Mercurial > ppgen
annotate ppgen.py @ 6:81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
author | Bernhard Reiter <bernhard@intevation.de> |
---|---|
date | Mon, 13 Feb 2017 08:38:06 +0100 |
parents | f8e24b2b6b6a |
children | 8b2f8f439817 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python3 |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
2 """Create a random passphrase from a dictionary of words. BETA |
0 | 3 |
4 Relies on the entropy of python's | |
5 random.SystemRandom class | |
6 which (according to the documentation) calls os.urandom() | |
7 which (according to the documentation) calls the operating system | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
8 specific randomness source which "should be unpredictable |
0 | 9 enough for cryptographic applications" |
10 | |
11 Requires: | |
12 * Python v>=3.2 | |
13 * a dictionary, Ding's trans-de-en by default. | |
14 E.g. on a Debian/Ubuntu system in package "trans-de-en". | |
15 or from http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/ | |
16 | |
1
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
17 Uses a hardcoded filepath and language. |
0 | 18 Search for **customize** below to change it. |
19 | |
3
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
20 Related: There is a Go implementation started by Sascha L. Teichmann at |
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
21 https://bitbucket.org/s_l_teichmann/ppgen |
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
22 |
757625ec8364
Comment added hint about SLT's Go implementation.
Bernhard Reiter <bernhard@intevation.de>
parents:
2
diff
changeset
|
23 |
6
81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
Bernhard Reiter <bernhard@intevation.de>
parents:
5
diff
changeset
|
24 Copyright 2016, 2017 by Intevation GmbH. |
1
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
25 Author: Bernhard E. Reiter <bernhard@intevation.de> |
0 | 26 |
27 This file is Free Software under the Apache 2.0 license and thus | |
28 comes without any warranty (to extend permissible under applicable law). | |
29 """ | |
30 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
31 import argparse |
0 | 32 import math |
33 import re | |
34 import sys | |
35 | |
36 from random import SystemRandom | |
37 _srandom = SystemRandom() | |
38 | |
39 tainted = False # to be set if we find a hint that the passphrase may be weak | |
40 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
41 |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
42 def buildDictionary(options): |
0 | 43 """Build up a dictionary of unique words, calculate stats.""" |
44 global tainted | |
45 d = [] | |
46 | |
47 # dictionary for testing | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
48 #d = ["abc", "aBc", "cde", "efg", "hij", "blubber", |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
49 # "jikf", "zug", "lmf", "opq"] |
2
a099246680ae
Fix for the unique test.
Bernhard Reiter <bernhard@intevation.de>
parents:
1
diff
changeset
|
50 # second test dictionary to show that different string functions are used. |
a099246680ae
Fix for the unique test.
Bernhard Reiter <bernhard@intevation.de>
parents:
1
diff
changeset
|
51 #d = [''.join('A' * 1000) for _ in range(1000)] |
0 | 52 |
53 # Using the dictionary from Ding **customize** | |
54 d = readDingDict(filename="/usr/share/trans/de-en", useLeft=True) | |
55 | |
5
f8e24b2b6b6a
Cleanup, tiny: fixes typo in Comment.
Bernhard Reiter <bernhard@intevation.de>
parents:
4
diff
changeset
|
56 ## for debugging purposes, dump dictionary |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
57 if options.ddump_filename: |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
58 print("Writing out dictionary in '{}'.".format(options.ddump_filename)) |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
59 with open(options.ddump_filename, "w") as f: |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
60 for i in d: |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
61 f.write("{}\n".format(i)) |
0 | 62 |
63 # Print some stats on the dictionary to be used | |
64 dl = len(d) | |
1
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
65 print("Found {:d} dictionary entries.".format(dl)) |
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
66 if dl < 8000: |
00ed7df30fe4
Checking for 8k entries now. Comment improvements.
Bernhard Reiter <bernhard@intevation.de>
parents:
0
diff
changeset
|
67 print("!Your dictionary is below 8k entries, that is quite small!") |
0 | 68 tainted = True |
69 | |
70 print("|= Number of words |= possibilities |") | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
71 for i in range(1, 5): |
0 | 72 print("| {:2d} | 2^{:4.1f} |".format( |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
73 i, math.log(dl**i, 2))) |
0 | 74 return d |
75 | |
76 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
77 def readDingDict(filename="/usr/share/trans/de-en", useLeft=False): |
0 | 78 """Read dictionary with unique words from file in Ding format. |
79 | |
80 useLeft: Boolean to control which language to use | |
81 | |
82 TODO: add option to use both languages for people that speak them both? | |
83 """ | |
84 | |
6
81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
Bernhard Reiter <bernhard@intevation.de>
parents:
5
diff
changeset
|
85 dset = set() # using the datatype 'set' to avoid duplicates |
0 | 86 |
87 splitter = re.compile(r"""\ \|\ # first pattern ' | ' | |
88 |;\ # second pattern '; ' | |
6
81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
Bernhard Reiter <bernhard@intevation.de>
parents:
5
diff
changeset
|
89 |(?<=\S)/(?=\S) # 3.: '/' surrounded by chars |
0 | 90 |\s+ # by whitespace |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
91 """, re.VERBOSE) |
0 | 92 |
93 print("Reading entries from {}.".format(filename), end='') | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
94 counter = 0 # for progress or stopping early |
0 | 95 with open(filename, "r") as f: |
96 for line in f: | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
97 if line[0] == '#': |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
98 continue |
0 | 99 |
100 # languages are separated by " :: " | |
101 p = line.partition(" :: ") | |
102 languageEntry = p[0] if useLeft else p[2] | |
103 | |
104 for word in splitter.split(languageEntry): | |
105 word = word.strip('(",.)\'!:;').rstrip('/') | |
106 if len(word) > 2 and not word[0] in '[{/': | |
107 dset.add(word) | |
108 | |
109 #TODO: check for very common words and remove them? | |
110 | |
111 counter += 1 | |
112 ## stop early when debugging | |
113 #if counter > 10: break | |
114 if not counter % 10000: | |
115 print('.', end='') | |
116 sys.stdout.flush() | |
117 print() | |
118 | |
119 return list(dset) | |
120 | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
121 |
0 | 122 def main(): |
123 global tainted | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
124 |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
125 parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
126 parser.add_argument('--ddump-filename', |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
127 help='filename to dump the dictionary to') |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
128 options = parser.parse_args() |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
129 |
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
130 dictionary = buildDictionary(options) |
0 | 131 |
132 howMany = 4 | |
133 | |
6
81f75c9aac84
Cleanup, minor: improves Comments. Bumps copyright.
Bernhard Reiter <bernhard@intevation.de>
parents:
5
diff
changeset
|
134 # use a dictionary with lower cased words for a simple check if |
0 | 135 # our random source is okay |
136 print("\nGenerated passphrase with {} randomly selected words:\n".format( | |
137 howMany)) | |
138 print(" ", end='') | |
139 words = {} | |
140 for x in range(howMany): | |
141 word = _srandom.choice(dictionary) | |
4
85c65a597420
Improves: command line options and code style.
Bernhard Reiter <bernhard@intevation.de>
parents:
3
diff
changeset
|
142 words[word.lower()] = True |
0 | 143 print(word, end='\n ') |
144 print("\n") | |
145 | |
146 if len(words) < howMany: | |
147 print("! Your random generator is weak") | |
148 print("! or you are being very lucky.") | |
149 tainted = True | |
150 | |
151 if tainted: | |
152 print("!!! Don't use the resulting passphrase !!!") | |
153 | |
154 if __name__ == "__main__": | |
155 main() |