bernhard@0: #!/usr/bin/env python3 bernhard@4: """Create a random passphrase from a dictionary of words. BETA bernhard@0: bernhard@0: Relies on the entropy of python's bernhard@0: random.SystemRandom class bernhard@0: which (according to the documentation) calls os.urandom() bernhard@0: which (according to the documentation) calls the operating system bernhard@4: specific randomness source which "should be unpredictable bernhard@0: enough for cryptographic applications" bernhard@0: bernhard@0: Requires: bernhard@0: * Python v>=3.2 bernhard@0: * a dictionary, Ding's trans-de-en by default. bernhard@0: E.g. on a Debian/Ubuntu system in package "trans-de-en". bernhard@0: or from http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/ bernhard@0: bernhard@1: Uses a hardcoded filepath and language. bernhard@0: Search for **customize** below to change it. bernhard@0: bernhard@3: Related: There is a Go implementation started by Sascha L. Teichmann at bernhard@3: https://bitbucket.org/s_l_teichmann/ppgen bernhard@3: bernhard@3: bernhard@6: Copyright 2016, 2017 by Intevation GmbH. bernhard@1: Author: Bernhard E. Reiter bernhard@0: bernhard@0: This file is Free Software under the Apache 2.0 license and thus bernhard@0: comes without any warranty (to extend permissible under applicable law). bernhard@0: """ bernhard@0: bernhard@4: import argparse bernhard@0: import math bernhard@0: import re bernhard@0: import sys bernhard@0: bernhard@0: from random import SystemRandom bernhard@0: _srandom = SystemRandom() bernhard@0: bernhard@0: tainted = False # to be set if we find a hint that the passphrase may be weak bernhard@0: bernhard@4: bernhard@4: def buildDictionary(options): bernhard@0: """Build up a dictionary of unique words, calculate stats.""" bernhard@0: global tainted bernhard@0: d = [] bernhard@0: bernhard@0: # dictionary for testing bernhard@4: #d = ["abc", "aBc", "cde", "efg", "hij", "blubber", bernhard@4: # "jikf", "zug", "lmf", "opq"] bernhard@2: # second test dictionary to show that different string functions are used. bernhard@2: #d = [''.join('A' * 1000) for _ in range(1000)] bernhard@0: bernhard@0: # Using the dictionary from Ding **customize** bernhard@0: d = readDingDict(filename="/usr/share/trans/de-en", useLeft=True) bernhard@0: bernhard@5: ## for debugging purposes, dump dictionary bernhard@4: if options.ddump_filename: bernhard@4: print("Writing out dictionary in '{}'.".format(options.ddump_filename)) bernhard@4: with open(options.ddump_filename, "w") as f: bernhard@4: for i in d: bernhard@4: f.write("{}\n".format(i)) bernhard@0: bernhard@0: # Print some stats on the dictionary to be used bernhard@0: dl = len(d) bernhard@1: print("Found {:d} dictionary entries.".format(dl)) bernhard@1: if dl < 8000: bernhard@1: print("!Your dictionary is below 8k entries, that is quite small!") bernhard@0: tainted = True bernhard@0: bernhard@0: print("|= Number of words |= possibilities |") bernhard@4: for i in range(1, 5): bernhard@0: print("| {:2d} | 2^{:4.1f} |".format( bernhard@4: i, math.log(dl**i, 2))) bernhard@0: return d bernhard@0: bernhard@0: bernhard@4: def readDingDict(filename="/usr/share/trans/de-en", useLeft=False): bernhard@0: """Read dictionary with unique words from file in Ding format. bernhard@0: bernhard@0: useLeft: Boolean to control which language to use bernhard@0: bernhard@0: TODO: add option to use both languages for people that speak them both? bernhard@0: """ bernhard@0: bernhard@6: dset = set() # using the datatype 'set' to avoid duplicates bernhard@0: bernhard@0: splitter = re.compile(r"""\ \|\ # first pattern ' | ' bernhard@0: |;\ # second pattern '; ' bernhard@6: |(?<=\S)/(?=\S) # 3.: '/' surrounded by chars bernhard@0: |\s+ # by whitespace bernhard@4: """, re.VERBOSE) bernhard@0: bernhard@0: print("Reading entries from {}.".format(filename), end='') bernhard@4: counter = 0 # for progress or stopping early bernhard@0: with open(filename, "r") as f: bernhard@0: for line in f: bernhard@4: if line[0] == '#': bernhard@4: continue bernhard@0: bernhard@0: # languages are separated by " :: " bernhard@0: p = line.partition(" :: ") bernhard@0: languageEntry = p[0] if useLeft else p[2] bernhard@0: bernhard@0: for word in splitter.split(languageEntry): bernhard@7: word = word.strip('(",.)\'!:;<>').rstrip('/') bernhard@0: if len(word) > 2 and not word[0] in '[{/': bernhard@0: dset.add(word) bernhard@0: bernhard@0: #TODO: check for very common words and remove them? bernhard@0: bernhard@0: counter += 1 bernhard@0: ## stop early when debugging bernhard@0: #if counter > 10: break bernhard@0: if not counter % 10000: bernhard@0: print('.', end='') bernhard@0: sys.stdout.flush() bernhard@0: print() bernhard@0: bernhard@0: return list(dset) bernhard@0: bernhard@4: bernhard@0: def main(): bernhard@0: global tainted bernhard@4: bernhard@4: parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) bernhard@4: parser.add_argument('--ddump-filename', bernhard@4: help='filename to dump the dictionary to') bernhard@4: options = parser.parse_args() bernhard@4: bernhard@4: dictionary = buildDictionary(options) bernhard@0: bernhard@0: howMany = 4 bernhard@0: bernhard@6: # use a dictionary with lower cased words for a simple check if bernhard@0: # our random source is okay bernhard@0: print("\nGenerated passphrase with {} randomly selected words:\n".format( bernhard@0: howMany)) bernhard@0: print(" ", end='') bernhard@0: words = {} bernhard@0: for x in range(howMany): bernhard@0: word = _srandom.choice(dictionary) bernhard@4: words[word.lower()] = True bernhard@0: print(word, end='\n ') bernhard@0: print("\n") bernhard@0: bernhard@0: if len(words) < howMany: bernhard@0: print("! Your random generator is weak") bernhard@0: print("! or you are being very lucky.") bernhard@0: tainted = True bernhard@0: bernhard@0: if tainted: bernhard@0: print("!!! Don't use the resulting passphrase !!!") bernhard@0: bernhard@0: if __name__ == "__main__": bernhard@0: main()