Mercurial > ppgen
annotate ppgen.py @ 9:35c468a37b54
Extend command line options to select 2nd language
author | Bernhard Reiter <bernhard@intevation.de> |
---|---|
date | Wed, 02 Oct 2019 15:04:28 +0200 |
parents | 200c2c3c5f67 |
children | 15d5b3961009 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python3 |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
2 """Create a random passphrase from a dictionary of words. BETA |
0 | 3 |
4 Relies on the entropy of python's | |
5 random.SystemRandom class | |
6 which (according to the documentation) calls os.urandom() | |
7 which (according to the documentation) calls the operating system | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
8 specific randomness source which "should be unpredictable |
0 | 9 enough for cryptographic applications" |
10 | |
11 Requires: | |
12 * Python v>=3.2 | |
13 * a dictionary, Ding's trans-de-en by default. | |
14 E.g. on a Debian/Ubuntu system in package "trans-de-en". | |
15 or from http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/ | |
16 | |
1
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
17 Uses a hardcoded filepath and language. |
0 | 18 Search for **customize** below to change it. |
19 | |
3
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
20 Related: There is a Go implementation started by Sascha L. Teichmann at |
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
21 https://bitbucket.org/s_l_teichmann/ppgen |
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
22 |
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
23 |
8 | 24 Copyright 2016, 2017, 2018 by Intevation GmbH. |
1
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
25 Author: Bernhard E. Reiter <bernhard@intevation.de> |
0 | 26 |
27 This file is Free Software under the Apache 2.0 license and thus | |
28 comes without any warranty (to extend permissible under applicable law). | |
29 """ | |
30 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
31 import argparse |
0 | 32 import math |
33 import re | |
34 import sys | |
35 | |
36 from random import SystemRandom | |
37 _srandom = SystemRandom() | |
38 | |
39 tainted = False # to be set if we find a hint that the passphrase may be weak | |
40 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
41 |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
42 def buildDictionary(options): |
0 | 43 """Build up a dictionary of unique words, calculate stats.""" |
44 global tainted | |
45 d = [] | |
46 | |
47 # dictionary for testing | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
48 #d = ["abc", "aBc", "cde", "efg", "hij", "blubber", |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
49 # "jikf", "zug", "lmf", "opq"] |
2
a099246680ae
Fix for the unique test.
Bernhard Reiter <bernhard@intevation.de>
parents:
1
diff
changeset
|
50 # second test dictionary to show that different string functions are used. |
a099246680ae
Fix for the unique test.
Bernhard Reiter <bernhard@intevation.de>
parents:
1
diff
changeset
|
51 #d = [''.join('A' * 1000) for _ in range(1000)] |
0 | 52 |
53 # Using the dictionary from Ding **customize** | |
9
35c468a37b54
Extend command line options to select 2nd language
Bernhard Reiter <bernhard@intevation.de>
parents:
8
diff
changeset
|
54 d = readDingDict(options, |
35c468a37b54
Extend command line options to select 2nd language
Bernhard Reiter <bernhard@intevation.de>
parents:
8
diff
changeset
|
55 filename="/usr/share/trans/de-en", |
35c468a37b54
Extend command line options to select 2nd language
Bernhard Reiter <bernhard@intevation.de>
parents:
8
diff
changeset
|
56 useLeft=not options.second_language) |
0 | 57 |
8 | 58 # for debugging purposes, dump dictionary |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
59 if options.ddump_filename: |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
60 print("Writing out dictionary in '{}'.".format(options.ddump_filename)) |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
61 with open(options.ddump_filename, "w") as f: |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
62 for i in d: |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
63 f.write("{}\n".format(i)) |
0 | 64 |
65 # Print some stats on the dictionary to be used | |
66 dl = len(d) | |
8 | 67 if not options.just_passphrase: |
68 print("Found {:d} dictionary entries.".format(dl)) | |
69 print("|= Number of words |= possibilities |") | |
70 for i in range(1, 5): | |
71 print("| {:2d} | 2^{:4.1f} |".format( | |
72 i, math.log(dl**i, 2))) | |
73 | |
1
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
74 if dl < 8000: |
8 | 75 sys.stderr.write("!Your dictionary is below 8k entries, " |
76 "that is quite small!\n") | |
0 | 77 tainted = True |
78 return d | |
79 | |
80 | |
8 | 81 def readDingDict(options, filename="/usr/share/trans/de-en", useLeft=False): |
0 | 82 """Read dictionary with unique words from file in Ding format. |
83 | |
84 useLeft: Boolean to control which language to use | |
85 | |
86 TODO: add option to use both languages for people that speak them both? | |
87 """ | |
88 | |
6
81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
Bernhard Reiter <bernhard@intevation.de>
parents:
5
diff
changeset
|
89 dset = set() # using the datatype 'set' to avoid duplicates |
0 | 90 |
91 splitter = re.compile(r"""\ \|\ # first pattern ' | ' | |
92 |;\ # second pattern '; ' | |
6
81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
Bernhard Reiter <bernhard@intevation.de>
parents:
5
diff
changeset
|
93 |(?<=\S)/(?=\S) # 3.: '/' surrounded by chars |
0 | 94 |\s+ # by whitespace |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
95 """, re.VERBOSE) |
0 | 96 |
8 | 97 if not options.just_passphrase: |
98 print("Reading entries from {}.".format(filename), end='') | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
99 counter = 0 # for progress or stopping early |
0 | 100 with open(filename, "r") as f: |
101 for line in f: | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
102 if line[0] == '#': |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
103 continue |
0 | 104 |
105 # languages are separated by " :: " | |
106 p = line.partition(" :: ") | |
107 languageEntry = p[0] if useLeft else p[2] | |
108 | |
109 for word in splitter.split(languageEntry): | |
7
8b2f8f439817
Improves: ding parser.
Bernhard Reiter <bernhard@intevation.de>
parents:
6
diff
changeset
|
110 word = word.strip('(",.)\'!:;<>').rstrip('/') |
0 | 111 if len(word) > 2 and not word[0] in '[{/': |
112 dset.add(word) | |
113 | |
114 #TODO: check for very common words and remove them? | |
115 | |
116 counter += 1 | |
117 ## stop early when debugging | |
118 #if counter > 10: break | |
8 | 119 if not options.just_passphrase and counter % 10000 == 0: |
0 | 120 print('.', end='') |
121 sys.stdout.flush() | |
8 | 122 if not options.just_passphrase: |
123 print() | |
0 | 124 |
125 return list(dset) | |
126 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
127 |
0 | 128 def main(): |
129 global tainted | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
130 |
8 | 131 parser = argparse.ArgumentParser( |
132 description=__doc__.splitlines()[0], | |
133 formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
134 parser.add_argument('-n', '--number-of-words', type=int, default=4, | |
135 help='how many words to draw for the passphrase, ' | |
136 'most useful with -j') | |
137 parser.add_argument('-j', '--just-passphrase', action="store_true", | |
138 help='only output the passphrase on a single line') | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
139 parser.add_argument('--ddump-filename', |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
140 help='filename to dump the dictionary to') |
9
35c468a37b54
Extend command line options to select 2nd language
Bernhard Reiter <bernhard@intevation.de>
parents:
8
diff
changeset
|
141 parser.add_argument('-2', '--second-language', action='store_true', |
35c468a37b54
Extend command line options to select 2nd language
Bernhard Reiter <bernhard@intevation.de>
parents:
8
diff
changeset
|
142 help='use the second language in the ding dictionary') |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
143 options = parser.parse_args() |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
144 |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
145 dictionary = buildDictionary(options) |
0 | 146 |
8 | 147 how_many = options.number_of_words |
0 | 148 |
8 | 149 output_string = "" |
150 if not options.just_passphrase: | |
151 print("\nGenerated passphrase with {}" | |
152 " randomly selected words:\n".format(how_many)) | |
153 print(" ", end='') | |
154 separator = '\n ' | |
155 else: | |
156 separator = ' ' | |
157 | |
158 # use a dictionary `words` with lower cased words for a rudimentary check | |
0 | 159 words = {} |
8 | 160 for x in range(how_many): |
0 | 161 word = _srandom.choice(dictionary) |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
162 words[word.lower()] = True |
8 | 163 output_string += word + separator |
0 | 164 |
8 | 165 print(output_string) |
166 | |
167 if len(words) < how_many: | |
168 sys.stderr.write("! You've drawn a word more than once, this means:\n" | |
169 "! Your random generation is weak" | |
170 " or you are being very lucky.\n") | |
0 | 171 tainted = True |
172 | |
173 if tainted: | |
8 | 174 sys.exit("!!! Don't use the resulting passphrase !!!") |
0 | 175 |
176 if __name__ == "__main__": | |
177 main() |