sascha@162: #!/usr/bin/env python
sascha@162: 
sascha@162: import sys
sascha@162: import logging
sascha@162: import re
sascha@162: import os
sascha@162: 
sascha@162: import sqlite3 as db
sascha@162: import locale
sascha@162: import codecs
sascha@162: 
sascha@162: from optparse import OptionParser
sascha@162: 
sascha@162: log = logging.getLogger(__name__) 
sascha@162: log.setLevel(logging.WARNING)
sascha@162: log.addHandler(logging.StreamHandler(sys.stderr))
sascha@162: 
sascha@162: RANGE = re.compile("([^#]*)#(.*)")
sascha@162: DEFAULT_DATABASE = "flys.db"
sascha@162: 
sascha@162: SQL_NEXT_ID   = "SELECT coalesce(max(id), -1) + 1 FROM %s"
sascha@162: SQL_SELECT_ID = "SELECT id FROM %s WHERE %s = ?"
sascha@162: SQL_INSERT_ID = "INSERT INTO %s (id, %s) VALUES (?, ?)"
sascha@162: 
sascha@162: SQL_SELECT_RANGE_ID = """
sascha@162: SELECT id FROM ranges WHERE river_id = ? AND a = ? AND b = ?
sascha@162: """
sascha@162: SQL_INSERT_RANGE_ID = """
sascha@162: INSERT INTO ranges (id, river_id, a, b) VALUES (?, ?, ?, ?)
sascha@162: """
sascha@162: SQL_SELECT_ANNOTATION_ID = """
sascha@162: SELECT id FROM annotations
sascha@162: WHERE range_id = ? AND attribute_id = ? AND position_id = ?
sascha@162: """
sascha@162: SQL_INSERT_ANNOTATION_ID = """
sascha@162: INSERT INTO annotations (id, range_id, attribute_id, position_id) 
sascha@162: VALUES (?, ?, ?, ?)
sascha@162: """
sascha@162: 
sascha@162: def encode(s):
sascha@162:     try:
sascha@162:         return unicode(s, "latin-1")
sascha@162:     except UnicodeDecodeError:
sascha@162:         return unicode.encode(s, locale.getpreferredencoding())
sascha@162: 
sascha@162: class hashabledict(dict):
sascha@162:     def __key(self):
sascha@162:         return tuple((k, self[k]) for k in sorted(self))
sascha@162:     def __hash__(self):
sascha@162:         return hash(self.__key())
sascha@162:     def __eq__(self, other):
sascha@162:         return self.__key() == other.__key()
sascha@162: 
sascha@162: def cache(f):
sascha@162:     def func(*args, **kw):
sascha@162:         key = (args, hashabledict(kw))
sascha@162:         try:
sascha@162:             return f.__cache__[key]
sascha@162:         except KeyError:
sascha@162:             value = f(*args, **kw)
sascha@162:             f.__cache__[key] = value
sascha@162:             return value
sascha@162:     f.__cache__ = {}
sascha@162:     return func
sascha@162: 
sascha@162: NEXT_IDS = {}
sascha@162: def next_id(cur, relation):
sascha@162:     idx = NEXT_IDS.get(relation)
sascha@162:     if idx is None:
sascha@162:         cur.execute(SQL_NEXT_ID % relation)
sascha@162:         idx = cur.fetchone()[0]
sascha@162:     NEXT_IDS[relation] = idx + 1
sascha@162:     return idx
sascha@162: 
sascha@162: def get_id(cur, relation, attribute, value):
sascha@162:     select_stmt = SQL_SELECT_ID % (relation, attribute)
sascha@162:     #log.debug(select_stmt)
sascha@162:     cur.execute(select_stmt, (value,))
sascha@162:     row = cur.fetchone()
sascha@162:     if row: return row[0]
sascha@162:     idx = next_id(cur, relation)
sascha@162:     insert_stmnt = SQL_INSERT_ID % (relation, attribute)
sascha@162:     #log.debug(insert_stmnt)
sascha@162:     cur.execute(insert_stmnt, (idx, value))
sascha@162:     cur.connection.commit()
sascha@162:     log.debug("insert %s '%s' id: '%d'" % (relation, value, idx))
sascha@162:     return idx
sascha@162: 
sascha@162: #@cache
sascha@162: def get_river_id(cur, name):
sascha@162:     return get_id(cur, "rivers", "name", name)
sascha@162: 
sascha@162: #@cache
sascha@162: def get_attribute_id(cur, value):
sascha@162:     return get_id(cur, "attributes", "value", value)
sascha@162: 
sascha@162: #@cache
sascha@162: def get_position_id(cur, value):
sascha@162:     return get_id(cur, "positions", "value", value)
sascha@162: 
sascha@162: #@cache
sascha@162: def get_range_id(cur, river_id, a, b):
sascha@162:     cur.execute(SQL_SELECT_RANGE_ID, (river_id, a, b))
sascha@162:     row = cur.fetchone()
sascha@162:     if row: return row[0]
sascha@162:     idx = next_id(cur, "ranges")
sascha@162:     cur.execute(SQL_INSERT_RANGE_ID, (idx, river_id, a, b))
sascha@162:     cur.connection.commit()
sascha@162:     return idx
sascha@162: 
sascha@162: #@cache
sascha@162: def get_annotation_id(cur, range_id, attribute_id, position_id):
sascha@162:     cur.execute(SQL_SELECT_ANNOTATION_ID, (
sascha@162:         range_id, attribute_id, position_id))
sascha@162:     row = cur.fetchone()
sascha@162:     if row: return row[0]
sascha@162:     idx = next_id(cur, "annotations")
sascha@162:     cur.execute(SQL_INSERT_ANNOTATION_ID, (
sascha@162:         idx, range_id, attribute_id, position_id))
sascha@162:     cur.connection.commit()
sascha@162:     return idx
sascha@162: 
sascha@162: def files(root, accept=lambda x: True):
sascha@162:     if os.path.isfile(root):
sascha@162:         if accept(root): yield root
sascha@162:     elif os.path.isdir(root):
sascha@162:         stack = [ root ]
sascha@162:         while stack:
sascha@162:             cur = stack.pop()
sascha@162:             for f in os.listdir(cur):
sascha@162:                 p = os.path.join(cur, f)
sascha@162:                 if os.path.isdir(p):
sascha@162:                     stack.append(p)
sascha@162:                 elif os.path.isfile(p) and accept(p):
sascha@162:                     yield p
sascha@162: 
sascha@162: def feed_km(cur, river_id, km_file):
sascha@162: 
sascha@162:     log.info("processing: %s" % km_file)
sascha@162: 
sascha@162:     for line in codecs.open(km_file, "r", "latin-1"):
sascha@162:         line = line.strip()
sascha@162:         if not line or line.startswith('*'):
sascha@162:             continue
sascha@162:         parts = [x.strip() for x in line.split(';')]
sascha@162:         if len(parts) < 3:
sascha@162:             log.error("cannot process: '%s'" % line)
sascha@162:             continue
sascha@162:         m = RANGE.match(parts[2])
sascha@162:         try:
sascha@162:             if m:
sascha@162:                 x = [float(x.replace(",", ".")) for x in m.groups()]
sascha@162:                 a, b = min(x), max(x)
sascha@162:                 if a == b: b = None
sascha@162:             else:
sascha@162:                 a, b = float(parts[2].replace(",", ".")), None
sascha@162:         except ValueError:
sascha@162:             log.error("cannot process: '%s'" % line)
sascha@162:             continue
sascha@162: 
sascha@162:         attribute = parts[0]
sascha@162:         position  = parts[1]
sascha@162:         attribute_id = get_attribute_id(cur, attribute) if attribute else None
sascha@162:         position_id  = get_position_id(cur, position) if position else None
sascha@162: 
sascha@162:         range_id = get_range_id(cur, river_id, a, b)
sascha@162: 
sascha@162:         get_annotation_id(cur, range_id, attribute_id, position_id)
sascha@162: 
sascha@162: def main():
sascha@162: 
sascha@162:     usage = "usage: %prog [options] river km-file ..."
sascha@162:     parser = OptionParser(usage=usage)
sascha@162:     parser.add_option(
sascha@162:         "-v", "--verbose", action="store_true",
sascha@162:         dest="verbose",
sascha@162:         help="verbose output")
sascha@162:     parser.add_option(
sascha@162:         "-r", "--recursive", action="store_true",
sascha@162:         dest="recursive", default=False,
sascha@162:         help="recursive")
sascha@162:     parser.add_option(
sascha@162:         "-d", "--database", action="store",
sascha@162:         dest="database",
sascha@162:         help="database to connect with",
sascha@162:         default=DEFAULT_DATABASE)
sascha@162: 
sascha@162:     options, args = parser.parse_args()
sascha@162: 
sascha@162:     if options.verbose:
sascha@162:         log.setLevel(logging.INFO)
sascha@162:     
sascha@162:     if len(args) < 1:
sascha@162:         log.error("missing river argument")
sascha@162:         sys.exit(1)
sascha@162: 
sascha@162:     river = unicode(args[0], locale.getpreferredencoding())
sascha@162: 
sascha@162:     with db.connect(options.database) as con:
sascha@162:         cur = con.cursor()
sascha@162:         river_id = get_river_id(cur, river)
sascha@162: 
sascha@162:         for arg in args[1:]:
sascha@162:             if options.recursive:
sascha@162:                 for km_file in files(
sascha@162:                     arg, lambda x: x.lower().endswith(".km")):
sascha@162:                     feed_km(cur, river_id, km_file)
sascha@162:             else:
sascha@162:                 feed_km(cur, river_id, arg)
sascha@162:         
sascha@162: 
sascha@162: if __name__ == '__main__':
sascha@162:     main()