bernhard@0: #!/usr/bin/env python3 bernhard@4: """Create a random passphrase from a dictionary of words. BETA bernhard@0: bernhard@0: Relies on the entropy of python's bernhard@0: random.SystemRandom class bernhard@0: which (according to the documentation) calls os.urandom() bernhard@0: which (according to the documentation) calls the operating system bernhard@4: specific randomness source which "should be unpredictable bernhard@0: enough for cryptographic applications" bernhard@0: bernhard@0: Requires: bernhard@0: * Python v>=3.2 bernhard@0: * a dictionary, Ding's trans-de-en by default. bernhard@0: E.g. on a Debian/Ubuntu system in package "trans-de-en". bernhard@0: or from http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/ bernhard@0: bernhard@1: Uses a hardcoded filepath and language. bernhard@0: Search for **customize** below to change it. bernhard@0: bernhard@3: Related: There is a Go implementation started by Sascha L. Teichmann at bernhard@3: https://bitbucket.org/s_l_teichmann/ppgen bernhard@3: bernhard@3: bernhard@8: Copyright 2016, 2017, 2018 by Intevation GmbH. bernhard@1: Author: Bernhard E. Reiter bernhard@0: bernhard@0: This file is Free Software under the Apache 2.0 license and thus bernhard@0: comes without any warranty (to extend permissible under applicable law). bernhard@0: """ bernhard@0: bernhard@4: import argparse bernhard@0: import math bernhard@0: import re bernhard@0: import sys bernhard@0: bernhard@0: from random import SystemRandom bernhard@0: _srandom = SystemRandom() bernhard@0: bernhard@0: tainted = False # to be set if we find a hint that the passphrase may be weak bernhard@0: bernhard@4: bernhard@4: def buildDictionary(options): bernhard@0: """Build up a dictionary of unique words, calculate stats.""" bernhard@0: global tainted bernhard@0: d = [] bernhard@0: bernhard@0: # dictionary for testing bernhard@4: #d = ["abc", "aBc", "cde", "efg", "hij", "blubber", bernhard@4: # "jikf", "zug", "lmf", "opq"] bernhard@2: # second test dictionary to show that different string functions are used. bernhard@2: #d = [''.join('A' * 1000) for _ in range(1000)] bernhard@0: bernhard@0: # Using the dictionary from Ding **customize** bernhard@9: d = readDingDict(options, bernhard@9: filename="/usr/share/trans/de-en", bernhard@9: useLeft=not options.second_language) bernhard@0: bernhard@8: # for debugging purposes, dump dictionary bernhard@4: if options.ddump_filename: bernhard@4: print("Writing out dictionary in '{}'.".format(options.ddump_filename)) bernhard@4: with open(options.ddump_filename, "w") as f: bernhard@4: for i in d: bernhard@4: f.write("{}\n".format(i)) bernhard@0: bernhard@0: # Print some stats on the dictionary to be used bernhard@0: dl = len(d) bernhard@8: if not options.just_passphrase: bernhard@8: print("Found {:d} dictionary entries.".format(dl)) bernhard@8: print("|= Number of words |= possibilities |") bernhard@8: for i in range(1, 5): bernhard@8: print("| {:2d} | 2^{:4.1f} |".format( bernhard@8: i, math.log(dl**i, 2))) bernhard@8: bernhard@1: if dl < 8000: bernhard@8: sys.stderr.write("!Your dictionary is below 8k entries, " bernhard@8: "that is quite small!\n") bernhard@0: tainted = True bernhard@0: return d bernhard@0: bernhard@0: bernhard@8: def readDingDict(options, filename="/usr/share/trans/de-en", useLeft=False): bernhard@0: """Read dictionary with unique words from file in Ding format. bernhard@0: bernhard@0: useLeft: Boolean to control which language to use bernhard@0: bernhard@0: TODO: add option to use both languages for people that speak them both? bernhard@0: """ bernhard@0: bernhard@6: dset = set() # using the datatype 'set' to avoid duplicates bernhard@0: bernhard@0: splitter = re.compile(r"""\ \|\ # first pattern ' | ' bernhard@0: |;\ # second pattern '; ' bernhard@6: |(?<=\S)/(?=\S) # 3.: '/' surrounded by chars bernhard@0: |\s+ # by whitespace bernhard@4: """, re.VERBOSE) bernhard@0: bernhard@8: if not options.just_passphrase: bernhard@8: print("Reading entries from {}.".format(filename), end='') bernhard@4: counter = 0 # for progress or stopping early bernhard@0: with open(filename, "r") as f: bernhard@0: for line in f: bernhard@4: if line[0] == '#': bernhard@4: continue bernhard@0: bernhard@0: # languages are separated by " :: " bernhard@0: p = line.partition(" :: ") bernhard@0: languageEntry = p[0] if useLeft else p[2] bernhard@0: bernhard@0: for word in splitter.split(languageEntry): bernhard@7: word = word.strip('(",.)\'!:;<>').rstrip('/') bernhard@0: if len(word) > 2 and not word[0] in '[{/': bernhard@0: dset.add(word) bernhard@0: bernhard@0: #TODO: check for very common words and remove them? bernhard@0: bernhard@0: counter += 1 bernhard@0: ## stop early when debugging bernhard@0: #if counter > 10: break bernhard@8: if not options.just_passphrase and counter % 10000 == 0: bernhard@0: print('.', end='') bernhard@0: sys.stdout.flush() bernhard@8: if not options.just_passphrase: bernhard@8: print() bernhard@0: bernhard@0: return list(dset) bernhard@0: bernhard@4: bernhard@0: def main(): bernhard@0: global tainted bernhard@4: bernhard@8: parser = argparse.ArgumentParser( bernhard@8: description=__doc__.splitlines()[0], bernhard@8: formatter_class=argparse.ArgumentDefaultsHelpFormatter) bernhard@8: parser.add_argument('-n', '--number-of-words', type=int, default=4, bernhard@8: help='how many words to draw for the passphrase, ' bernhard@8: 'most useful with -j') bernhard@8: parser.add_argument('-j', '--just-passphrase', action="store_true", bernhard@8: help='only output the passphrase on a single line') bernhard@4: parser.add_argument('--ddump-filename', bernhard@4: help='filename to dump the dictionary to') bernhard@9: parser.add_argument('-2', '--second-language', action='store_true', bernhard@9: help='use the second language in the ding dictionary') bernhard@4: options = parser.parse_args() bernhard@4: bernhard@4: dictionary = buildDictionary(options) bernhard@0: bernhard@8: how_many = options.number_of_words bernhard@0: bernhard@8: output_string = "" bernhard@8: if not options.just_passphrase: bernhard@8: print("\nGenerated passphrase with {}" bernhard@8: " randomly selected words:\n".format(how_many)) bernhard@8: print(" ", end='') bernhard@8: separator = '\n ' bernhard@8: else: bernhard@8: separator = ' ' bernhard@8: bernhard@8: # use a dictionary `words` with lower cased words for a rudimentary check bernhard@0: words = {} bernhard@8: for x in range(how_many): bernhard@0: word = _srandom.choice(dictionary) bernhard@4: words[word.lower()] = True bernhard@8: output_string += word + separator bernhard@0: bernhard@8: print(output_string) bernhard@8: bernhard@8: if len(words) < how_many: bernhard@8: sys.stderr.write("! You've drawn a word more than once, this means:\n" bernhard@8: "! Your random generation is weak" bernhard@8: " or you are being very lucky.\n") bernhard@0: tainted = True bernhard@0: bernhard@0: if tainted: bernhard@8: sys.exit("!!! Don't use the resulting passphrase !!!") bernhard@0: bernhard@0: if __name__ == "__main__": bernhard@0: main()