Mercurial > dive4elements > river
view artifacts/contrib/add-i18n-numbers.py @ 6110:5ba28b563614
Fix datacage for user data by adding facet filtering.
Now the facets are joined in the user data select statement
and afterwards we just filter on the facet name where previously
new queryies were created.
Grouping also fixes the repeating folders caused by the change
in the statements during the large perfomance cleanup.
author | Andre Heinecke <aheinecke@intevation.de> |
---|---|
date | Mon, 27 May 2013 15:36:56 +0200 |
parents | 5aa05a7a34b7 |
children |
line wrap: on
line source
#!/usr/bin/env python """ Add unique numbers in front of properties values to identfy the key without knowing the real key. """ import sys import re import os BLACK_LISTED_KEYS = [ re.compile(r".*\.file$") ] BLACK_LISTED_VALUES = [ re.compile(r"^http.*$") ] NUMBERED = re.compile(r"^\s*([^\s]+)\s*=\s*\[([0-9a-zA-Z]+)\]\s*(.+)$") UNUMBERED = re.compile(r"^\s*([^\s]+)\s*=\s*(.+)$") ALPHA = "0123456789" \ "abcdefghijklmnopqrstuvwxyz" \ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" def decode_ibase62(s): t, c = 0, 1 for x in s[::-1]: i = ALPHA.find(x) t += i*c c *= len(ALPHA) return t def ibase62(i): if i == 0: return "0" out = [] if i < 0: out.append("-") i = -1 while i > 0: out.append(ALPHA[i % len(ALPHA)]) i //= len(ALPHA) out.reverse() return ''.join(out) def is_blacklisted(key, value): for bl in BLACK_LISTED_KEYS: if bl.match(key): return True for bl in BLACK_LISTED_VALUES: if bl.match(value): return True return False def find_key(already_numbered, value): for k, v in already_numbered.iteritems(): if v == value: return k return None def decorated_content(infile, outfile, already_numbered): for line in infile: line = line.strip() m = NUMBERED.match(line) if m: key, num, value = m.groups() decoded_num = decode_ibase62(num) last = find_key(already_numbered, decoded_num) if last is None: already_numbered[key] = decoded_num elif last != key: print >> sys.stderr, "WARN: Number clash: " \ "%s leeds to '%s' and '%s'" % (num, key, last) print >> outfile, line continue m = UNUMBERED.match(line) if m: key, value = m.groups(1) if is_blacklisted(key, value): print >> outfile, line else: num = already_numbered.setdefault(key, len(already_numbered)) print >> outfile, "%s=[%s] %s" % (key, ibase62(num), m.group(2)) continue print >> outfile, line def tmp_fname(fname): name = fname + ".tmp" i = 0 while os.path.exists(name): name = "%s.tmp%d" % (fname, i) i += 1 return name def decorate_file(fname, already_numbered): tmp = tmp_fname(fname) with open(fname, "r") as infile: with open(tmp, "w") as outfile: decorated_content(infile, outfile, already_numbered) os.rename(tmp, fname) def main(): already_numbered = {} for fname in sys.argv[1:]: print >> sys.stderr, "checking %s" % fname decorate_file(fname, already_numbered) if __name__ == "__main__": main()