Mercurial > dive4elements > river
comparison flys-backend/contrib/import-kms.py @ 162:80669241956c
Initial database import scripts. Not finished, yet.
flys-backend/trunk@1333 c6561f87-3c4e-4783-a992-168aeb5c3f6f
author | Sascha L. Teichmann <sascha.teichmann@intevation.de> |
---|---|
date | Tue, 22 Feb 2011 10:34:53 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
161:119048655872 | 162:80669241956c |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import logging | |
5 import re | |
6 import os | |
7 | |
8 import sqlite3 as db | |
9 import locale | |
10 import codecs | |
11 | |
12 from optparse import OptionParser | |
13 | |
14 log = logging.getLogger(__name__) | |
15 log.setLevel(logging.WARNING) | |
16 log.addHandler(logging.StreamHandler(sys.stderr)) | |
17 | |
18 RANGE = re.compile("([^#]*)#(.*)") | |
19 DEFAULT_DATABASE = "flys.db" | |
20 | |
21 SQL_NEXT_ID = "SELECT coalesce(max(id), -1) + 1 FROM %s" | |
22 SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?" | |
23 SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)" | |
24 | |
25 SQL_SELECT_RANGE_ID = """ | |
26 SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ? | |
27 """ | |
28 SQL_INSERT_RANGE_ID = """ | |
29 INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?) | |
30 """ | |
31 SQL_SELECT_ANNOTATION_ID = """ | |
32 SELECT id FROM annotations | |
33 WHERE range_id = ? AND attribute_id = ? AND position_id = ? | |
34 """ | |
35 SQL_INSERT_ANNOTATION_ID = """ | |
36 INSERT INTO annotations (id, range_id, attribute_id, position_id) | |
37 VALUES (?, ?, ?, ?) | |
38 """ | |
39 | |
40 def encode(s): | |
41 try: | |
42 return unicode(s, "latin-1") | |
43 except UnicodeDecodeError: | |
44 return unicode.encode(s, locale.getpreferredencoding()) | |
45 | |
46 class hashabledict(dict): | |
47 def __key(self): | |
48 return tuple((k, self[k]) for k in sorted(self)) | |
49 def __hash__(self): | |
50 return hash(self.__key()) | |
51 def __eq__(self, other): | |
52 return self.__key() == other.__key() | |
53 | |
54 def cache(f): | |
55 def func(*args, **kw): | |
56 key = (args, hashabledict(kw)) | |
57 try: | |
58 return f.__cache__[key] | |
59 except KeyError: | |
60 value = f(*args, **kw) | |
61 f.__cache__[key] = value | |
62 return value | |
63 f.__cache__ = {} | |
64 return func | |
65 | |
66 NEXT_IDS = {} | |
67 def next_id(cur, relation): | |
68 idx = NEXT_IDS.get(relation) | |
69 if idx is None: | |
70 cur.execute(SQL_NEXT_ID % relation) | |
71 idx = cur.fetchone()[0] | |
72 NEXT_IDS[relation] = idx + 1 | |
73 return idx | |
74 | |
75 def get_id(cur, relation, attribute, value): | |
76 select_stmt = SQL_SELECT_ID % (relation, attribute) | |
77 #log.debug(select_stmt) | |
78 cur.execute(select_stmt, (value,)) | |
79 row = cur.fetchone() | |
80 if row: return row[0] | |
81 idx = next_id(cur, relation) | |
82 insert_stmnt = SQL_INSERT_ID % (relation, attribute) | |
83 #log.debug(insert_stmnt) | |
84 cur.execute(insert_stmnt, (idx, value)) | |
85 cur.connection.commit() | |
86 log.debug("insert %s '%s' id: '%d'" % (relation, value, idx)) | |
87 return idx | |
88 | |
89 #@cache | |
90 def get_river_id(cur, name): | |
91 return get_id(cur, "rivers", "name", name) | |
92 | |
93 #@cache | |
94 def get_attribute_id(cur, value): | |
95 return get_id(cur, "attributes", "value", value) | |
96 | |
97 #@cache | |
98 def get_position_id(cur, value): | |
99 return get_id(cur, "positions", "value", value) | |
100 | |
101 #@cache | |
102 def get_range_id(cur, river_id, a, b): | |
103 cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b)) | |
104 row = cur.fetchone() | |
105 if row: return row[0] | |
106 idx = next_id(cur, "ranges") | |
107 cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b)) | |
108 cur.connection.commit() | |
109 return idx | |
110 | |
111 #@cache | |
112 def get_annotation_id(cur, range_id, attribute_id, position_id): | |
113 cur.execute(SQL_SELECT_ANNOTATION_ID, ( | |
114 range_id, attribute_id, position_id)) | |
115 row = cur.fetchone() | |
116 if row: return row[0] | |
117 idx = next_id(cur, "annotations") | |
118 cur.execute(SQL_INSERT_ANNOTATION_ID, ( | |
119 idx, range_id, attribute_id, position_id)) | |
120 cur.connection.commit() | |
121 return idx | |
122 | |
123 def files(root, accept=lambda x: True): | |
124 if os.path.isfile(root): | |
125 if accept(root): yield root | |
126 elif os.path.isdir(root): | |
127 stack = [ root ] | |
128 while stack: | |
129 cur = stack.pop() | |
130 for f in os.listdir(cur): | |
131 p = os.path.join(cur, f) | |
132 if os.path.isdir(p): | |
133 stack.append(p) | |
134 elif os.path.isfile(p) and accept(p): | |
135 yield p | |
136 | |
137 def feed_km(cur, river_id, km_file): | |
138 | |
139 log.info("processing: %s" % km_file) | |
140 | |
141 for line in codecs.open(km_file, "r", "latin-1"): | |
142 line = line.strip() | |
143 if not line or line.startswith('*'): | |
144 continue | |
145 parts = [x.strip() for x in line.split(';')] | |
146 if len(parts) < 3: | |
147 log.error("cannot process: '%s'" % line) | |
148 continue | |
149 m = RANGE.match(parts[2]) | |
150 try: | |
151 if m: | |
152 x = [float(x.replace(",", ".")) for x in m.groups()] | |
153 a, b = min(x), max(x) | |
154 if a == b: b = None | |
155 else: | |
156 a, b = float(parts[2].replace(",", ".")), None | |
157 except ValueError: | |
158 log.error("cannot process: '%s'" % line) | |
159 continue | |
160 | |
161 attribute = parts[0] | |
162 position = parts[1] | |
163 attribute_id = get_attribute_id(cur, attribute) if attribute else None | |
164 position_id = get_position_id(cur, position) if position else None | |
165 | |
166 range_id = get_range_id(cur, river_id, a, b) | |
167 | |
168 get_annotation_id(cur, range_id, attribute_id, position_id) | |
169 | |
170 def main(): | |
171 | |
172 usage = "usage: %prog [options] river km-file ..." | |
173 parser = OptionParser(usage=usage) | |
174 parser.add_option( | |
175 "-v", "--verbose", action="store_true", | |
176 dest="verbose", | |
177 help="verbose output") | |
178 parser.add_option( | |
179 "-r", "--recursive", action="store_true", | |
180 dest="recursive", default=False, | |
181 help="recursive") | |
182 parser.add_option( | |
183 "-d", "--database", action="store", | |
184 dest="database", | |
185 help="database to connect with", | |
186 default=DEFAULT_DATABASE) | |
187 | |
188 options, args = parser.parse_args() | |
189 | |
190 if options.verbose: | |
191 log.setLevel(logging.INFO) | |
192 | |
193 if len(args) < 1: | |
194 log.error("missing river argument") | |
195 sys.exit(1) | |
196 | |
197 river = unicode(args[0], locale.getpreferredencoding()) | |
198 | |
199 with db.connect(options.database) as con: | |
200 cur = con.cursor() | |
201 river_id = get_river_id(cur, river) | |
202 | |
203 for arg in args[1:]: | |
204 if options.recursive: | |
205 for km_file in files( | |
206 arg, lambda x: x.lower().endswith(".km")): | |
207 feed_km(cur, river_id, km_file) | |
208 else: | |
209 feed_km(cur, river_id, arg) | |
210 | |
211 | |
212 if __name__ == '__main__': | |
213 main() |