Mercurial > ppgen
view ppgen.py @ 10:15d5b3961009 tip
Remove BETA tag and update links to Go and dict
author | Bernhard Reiter <bernhard@intevation.de> |
---|---|
date | Sun, 04 Oct 2020 14:55:46 +0200 |
parents | 35c468a37b54 |
children |
line wrap: on
line source
#!/usr/bin/env python3 """Create a random passphrase from a dictionary of words. Relies on the entropy of python's random.SystemRandom class which (according to the documentation) calls os.urandom() which (according to the documentation) calls the operating system specific randomness source which "should be unpredictable enough for cryptographic applications" Requires: * Python v>=3.2 * a dictionary, Ding's trans-de-en by default. E.g. on a Debian/Ubuntu system in package "trans-de-en". or from https://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/ Uses a hardcoded filepath and language. Search for **customize** below to change it. Related: There is a Go implementation by Sascha L. Teichmann at https://gitlab.com/sascha.l.teichmann/ppgen Copyright 2016, 2017, 2018, 2019 by Intevation GmbH. Author: Bernhard E. Reiter <bernhard@intevation.de> This file is Free Software under the Apache 2.0 license and thus comes without any warranty (to extend permissible under applicable law). """ import argparse import math import re import sys from random import SystemRandom _srandom = SystemRandom() tainted = False # to be set if we find a hint that the passphrase may be weak def buildDictionary(options): """Build up a dictionary of unique words, calculate stats.""" global tainted d = [] # dictionary for testing #d = ["abc", "aBc", "cde", "efg", "hij", "blubber", # "jikf", "zug", "lmf", "opq"] # second test dictionary to show that different string functions are used. #d = [''.join('A' * 1000) for _ in range(1000)] # Using the dictionary from Ding **customize** d = readDingDict(options, filename="/usr/share/trans/de-en", useLeft=not options.second_language) # for debugging purposes, dump dictionary if options.ddump_filename: print("Writing out dictionary in '{}'.".format(options.ddump_filename)) with open(options.ddump_filename, "w") as f: for i in d: f.write("{}\n".format(i)) # Print some stats on the dictionary to be used dl = len(d) if not options.just_passphrase: print("Found {:d} dictionary entries.".format(dl)) print("|= Number of words |= possibilities |") for i in range(1, 5): print("| {:2d} | 2^{:4.1f} |".format( i, math.log(dl**i, 2))) if dl < 8000: sys.stderr.write("!Your dictionary is below 8k entries, " "that is quite small!\n") tainted = True return d def readDingDict(options, filename="/usr/share/trans/de-en", useLeft=False): """Read dictionary with unique words from file in Ding format. useLeft: Boolean to control which language to use TODO: add option to use both languages for people that speak them both? """ dset = set() # using the datatype 'set' to avoid duplicates splitter = re.compile(r"""\ \|\ # first pattern ' | ' |;\ # second pattern '; ' |(?<=\S)/(?=\S) # 3.: '/' surrounded by chars |\s+ # by whitespace """, re.VERBOSE) if not options.just_passphrase: print("Reading entries from {}.".format(filename), end='') counter = 0 # for progress or stopping early with open(filename, "r") as f: for line in f: if line[0] == '#': continue # languages are separated by " :: " p = line.partition(" :: ") languageEntry = p[0] if useLeft else p[2] for word in splitter.split(languageEntry): word = word.strip('(",.)\'!:;<>').rstrip('/') if len(word) > 2 and not word[0] in '[{/': dset.add(word) #TODO: check for very common words and remove them? counter += 1 ## stop early when debugging #if counter > 10: break if not options.just_passphrase and counter % 10000 == 0: print('.', end='') sys.stdout.flush() if not options.just_passphrase: print() return list(dset) def main(): global tainted parser = argparse.ArgumentParser( description=__doc__.splitlines()[0], formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--number-of-words', type=int, default=4, help='how many words to draw for the passphrase, ' 'most useful with -j') parser.add_argument('-j', '--just-passphrase', action="store_true", help='only output the passphrase on a single line') parser.add_argument('--ddump-filename', help='filename to dump the dictionary to') parser.add_argument('-2', '--second-language', action='store_true', help='use the second language in the ding dictionary') options = parser.parse_args() dictionary = buildDictionary(options) how_many = options.number_of_words output_string = "" if not options.just_passphrase: print("\nGenerated passphrase with {}" " randomly selected words:\n".format(how_many)) print(" ", end='') separator = '\n ' else: separator = ' ' # use a dictionary `words` with lower cased words for a rudimentary check words = {} for x in range(how_many): word = _srandom.choice(dictionary) words[word.lower()] = True output_string += word + separator print(output_string) if len(words) < how_many: sys.stderr.write("! You've drawn a word more than once, this means:\n" "! Your random generation is weak" " or you are being very lucky.\n") tainted = True if tainted: sys.exit("!!! Don't use the resulting passphrase !!!") if __name__ == "__main__": main()