Mercurial > dive4elements > river
comparison flys-artifacts/contrib/add-i18n-numbers.py @ 3651:06a65baae494
merged flys-artifacts/2.9
author | Thomas Arendsen Hein <thomas@intevation.de> |
---|---|
date | Fri, 28 Sep 2012 12:14:43 +0200 |
parents | 003940a7d6c9 |
children |
comparison
equal
deleted
inserted
replaced
3549:6a8f83c538e3 | 3651:06a65baae494 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ Add unique numbers in front of properties values | |
4 to identfy the key without knowing the real key. | |
5 """ | |
6 | |
7 import sys | |
8 import re | |
9 import os | |
10 | |
11 BLACK_LISTED_KEYS = [ | |
12 re.compile(r".*\.file$") | |
13 ] | |
14 | |
15 BLACK_LISTED_VALUES = [ | |
16 re.compile(r"^http.*$") | |
17 ] | |
18 | |
19 NUMBERED = re.compile(r"^\s*([^\s]+)\s*=\s*\[([0-9a-zA-Z]+)\]\s*(.+)$") | |
20 UNUMBERED = re.compile(r"^\s*([^\s]+)\s*=\s*(.+)$") | |
21 | |
22 ALPHA = "0123456789" \ | |
23 "abcdefghijklmnopqrstuvwxyz" \ | |
24 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
25 | |
26 def decode_ibase62(s): | |
27 t, c = 0, 1 | |
28 for x in s[::-1]: | |
29 i = ALPHA.find(x) | |
30 t += i*c | |
31 c *= len(ALPHA) | |
32 return t | |
33 | |
34 def ibase62(i): | |
35 if i == 0: | |
36 return "0" | |
37 out = [] | |
38 if i < 0: | |
39 out.append("-") | |
40 i = -1 | |
41 while i > 0: | |
42 out.append(ALPHA[i % len(ALPHA)]) | |
43 i //= len(ALPHA) | |
44 out.reverse() | |
45 return ''.join(out) | |
46 | |
47 def is_blacklisted(key, value): | |
48 | |
49 for bl in BLACK_LISTED_KEYS: | |
50 if bl.match(key): | |
51 return True | |
52 | |
53 for bl in BLACK_LISTED_VALUES: | |
54 if bl.match(value): | |
55 return True | |
56 | |
57 return False | |
58 | |
59 def find_key(already_numbered, value): | |
60 for k, v in already_numbered.iteritems(): | |
61 if v == value: | |
62 return k | |
63 return None | |
64 | |
65 def decorated_content(infile, outfile, already_numbered): | |
66 | |
67 for line in infile: | |
68 line = line.strip() | |
69 m = NUMBERED.match(line) | |
70 if m: | |
71 key, num, value = m.groups() | |
72 decoded_num = decode_ibase62(num) | |
73 last = find_key(already_numbered, decoded_num) | |
74 if last is None: | |
75 already_numbered[key] = decoded_num | |
76 elif last != key: | |
77 print >> sys.stderr, "WARN: Number clash: " \ | |
78 "%s leeds to '%s' and '%s'" % (num, key, last) | |
79 print >> outfile, line | |
80 continue | |
81 | |
82 m = UNUMBERED.match(line) | |
83 if m: | |
84 key, value = m.groups(1) | |
85 if is_blacklisted(key, value): | |
86 print >> outfile, line | |
87 else: | |
88 num = already_numbered.setdefault(key, len(already_numbered)) | |
89 print >> outfile, "%s=[%s] %s" % (key, ibase62(num), m.group(2)) | |
90 continue | |
91 print >> outfile, line | |
92 | |
93 def tmp_fname(fname): | |
94 name = fname + ".tmp" | |
95 i = 0 | |
96 while os.path.exists(name): | |
97 name = "%s.tmp%d" % (fname, i) | |
98 i += 1 | |
99 return name | |
100 | |
101 def decorate_file(fname, already_numbered): | |
102 | |
103 tmp = tmp_fname(fname) | |
104 | |
105 with open(fname, "r") as infile: | |
106 with open(tmp, "w") as outfile: | |
107 decorated_content(infile, outfile, already_numbered) | |
108 | |
109 os.rename(tmp, fname) | |
110 | |
111 def main(): | |
112 already_numbered = {} | |
113 for fname in sys.argv[1:]: | |
114 print >> sys.stderr, "checking %s" % fname | |
115 decorate_file(fname, already_numbered) | |
116 | |
117 if __name__ == "__main__": | |
118 main() |