comparison flys-backend/contrib/import-kms.py @ 1259:54365104835c 2.5

merged flys-backend/2.5
author Thomas Arendsen Hein <thomas@intevation.de>
date Fri, 28 Sep 2012 12:14:18 +0200
parents 80669241956c
children
comparison
equal deleted inserted replaced
1190:f514894ec2fd 1259:54365104835c
1 #!/usr/bin/env python
2
3 import sys
4 import logging
5 import re
6 import os
7
8 import sqlite3 as db
9 import locale
10 import codecs
11
12 from optparse import OptionParser
13
14 log = logging.getLogger(__name__)
15 log.setLevel(logging.WARNING)
16 log.addHandler(logging.StreamHandler(sys.stderr))
17
18 RANGE = re.compile("([^#]*)#(.*)")
19 DEFAULT_DATABASE = "flys.db"
20
21 SQL_NEXT_ID = "SELECT coalesce(max(id), -1) + 1 FROM %s"
22 SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?"
23 SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)"
24
25 SQL_SELECT_RANGE_ID = """
26 SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ?
27 """
28 SQL_INSERT_RANGE_ID = """
29 INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?)
30 """
31 SQL_SELECT_ANNOTATION_ID = """
32 SELECT id FROM annotations
33 WHERE range_id = ? AND attribute_id = ? AND position_id = ?
34 """
35 SQL_INSERT_ANNOTATION_ID = """
36 INSERT INTO annotations (id, range_id, attribute_id, position_id)
37 VALUES (?, ?, ?, ?)
38 """
39
40 def encode(s):
41 try:
42 return unicode(s, "latin-1")
43 except UnicodeDecodeError:
44 return unicode.encode(s, locale.getpreferredencoding())
45
46 class hashabledict(dict):
47 def __key(self):
48 return tuple((k, self[k]) for k in sorted(self))
49 def __hash__(self):
50 return hash(self.__key())
51 def __eq__(self, other):
52 return self.__key() == other.__key()
53
54 def cache(f):
55 def func(*args, **kw):
56 key = (args, hashabledict(kw))
57 try:
58 return f.__cache__[key]
59 except KeyError:
60 value = f(*args, **kw)
61 f.__cache__[key] = value
62 return value
63 f.__cache__ = {}
64 return func
65
66 NEXT_IDS = {}
67 def next_id(cur, relation):
68 idx = NEXT_IDS.get(relation)
69 if idx is None:
70 cur.execute(SQL_NEXT_ID % relation)
71 idx = cur.fetchone()[0]
72 NEXT_IDS[relation] = idx + 1
73 return idx
74
75 def get_id(cur, relation, attribute, value):
76 select_stmt = SQL_SELECT_ID % (relation, attribute)
77 #log.debug(select_stmt)
78 cur.execute(select_stmt, (value,))
79 row = cur.fetchone()
80 if row: return row[0]
81 idx = next_id(cur, relation)
82 insert_stmnt = SQL_INSERT_ID % (relation, attribute)
83 #log.debug(insert_stmnt)
84 cur.execute(insert_stmnt, (idx, value))
85 cur.connection.commit()
86 log.debug("insert %s '%s' id: '%d'" % (relation, value, idx))
87 return idx
88
89 #@cache
90 def get_river_id(cur, name):
91 return get_id(cur, "rivers", "name", name)
92
93 #@cache
94 def get_attribute_id(cur, value):
95 return get_id(cur, "attributes", "value", value)
96
97 #@cache
98 def get_position_id(cur, value):
99 return get_id(cur, "positions", "value", value)
100
101 #@cache
102 def get_range_id(cur, river_id, a, b):
103 cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b))
104 row = cur.fetchone()
105 if row: return row[0]
106 idx = next_id(cur, "ranges")
107 cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b))
108 cur.connection.commit()
109 return idx
110
111 #@cache
112 def get_annotation_id(cur, range_id, attribute_id, position_id):
113 cur.execute(SQL_SELECT_ANNOTATION_ID, (
114 range_id, attribute_id, position_id))
115 row = cur.fetchone()
116 if row: return row[0]
117 idx = next_id(cur, "annotations")
118 cur.execute(SQL_INSERT_ANNOTATION_ID, (
119 idx, range_id, attribute_id, position_id))
120 cur.connection.commit()
121 return idx
122
123 def files(root, accept=lambda x: True):
124 if os.path.isfile(root):
125 if accept(root): yield root
126 elif os.path.isdir(root):
127 stack = [ root ]
128 while stack:
129 cur = stack.pop()
130 for f in os.listdir(cur):
131 p = os.path.join(cur, f)
132 if os.path.isdir(p):
133 stack.append(p)
134 elif os.path.isfile(p) and accept(p):
135 yield p
136
137 def feed_km(cur, river_id, km_file):
138
139 log.info("processing: %s" % km_file)
140
141 for line in codecs.open(km_file, "r", "latin-1"):
142 line = line.strip()
143 if not line or line.startswith('*'):
144 continue
145 parts = [x.strip() for x in line.split(';')]
146 if len(parts) < 3:
147 log.error("cannot process: '%s'" % line)
148 continue
149 m = RANGE.match(parts[2])
150 try:
151 if m:
152 x = [float(x.replace(",", ".")) for x in m.groups()]
153 a, b = min(x), max(x)
154 if a == b: b = None
155 else:
156 a, b = float(parts[2].replace(",", ".")), None
157 except ValueError:
158 log.error("cannot process: '%s'" % line)
159 continue
160
161 attribute = parts[0]
162 position = parts[1]
163 attribute_id = get_attribute_id(cur, attribute) if attribute else None
164 position_id = get_position_id(cur, position) if position else None
165
166 range_id = get_range_id(cur, river_id, a, b)
167
168 get_annotation_id(cur, range_id, attribute_id, position_id)
169
170 def main():
171
172 usage = "usage: %prog [options] river km-file ..."
173 parser = OptionParser(usage=usage)
174 parser.add_option(
175 "-v", "--verbose", action="store_true",
176 dest="verbose",
177 help="verbose output")
178 parser.add_option(
179 "-r", "--recursive", action="store_true",
180 dest="recursive", default=False,
181 help="recursive")
182 parser.add_option(
183 "-d", "--database", action="store",
184 dest="database",
185 help="database to connect with",
186 default=DEFAULT_DATABASE)
187
188 options, args = parser.parse_args()
189
190 if options.verbose:
191 log.setLevel(logging.INFO)
192
193 if len(args) < 1:
194 log.error("missing river argument")
195 sys.exit(1)
196
197 river = unicode(args[0], locale.getpreferredencoding())
198
199 with db.connect(options.database) as con:
200 cur = con.cursor()
201 river_id = get_river_id(cur, river)
202
203 for arg in args[1:]:
204 if options.recursive:
205 for km_file in files(
206 arg, lambda x: x.lower().endswith(".km")):
207 feed_km(cur, river_id, km_file)
208 else:
209 feed_km(cur, river_id, arg)
210
211
212 if __name__ == '__main__':
213 main()

http://dive4elements.wald.intevation.org