bernhard@0: #!/usr/bin/env python3 bernhard@0: """Create a passphrase from a few random words. DRAFT bernhard@0: bernhard@0: Relies on the entropy of python's bernhard@0: random.SystemRandom class bernhard@0: which (according to the documentation) calls os.urandom() bernhard@0: which (according to the documentation) calls the operating system bernhard@0: specific randomness source which "should be unpredictable bernhard@0: enough for cryptographic applications" bernhard@0: bernhard@0: Requires: bernhard@0: * Python v>=3.2 bernhard@0: * a dictionary, Ding's trans-de-en by default. bernhard@0: E.g. on a Debian/Ubuntu system in package "trans-de-en". bernhard@0: or from http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/ bernhard@0: bernhard@0: Uses a hardcodes filepath and selected language. bernhard@0: Search for **customize** below to change it. bernhard@0: bernhard@0: Copyright 2016 by Intevation GmbH. bernhard@0: Author: 2016-01-21 Bernhard E. Reiter bernhard@0: bernhard@0: This file is Free Software under the Apache 2.0 license and thus bernhard@0: comes without any warranty (to extend permissible under applicable law). bernhard@0: """ bernhard@0: bernhard@0: import math bernhard@0: import re bernhard@0: import sys bernhard@0: bernhard@0: from random import SystemRandom bernhard@0: _srandom = SystemRandom() bernhard@0: bernhard@0: tainted = False # to be set if we find a hint that the passphrase may be weak bernhard@0: bernhard@0: def buildDictionary(): bernhard@0: """Build up a dictionary of unique words, calculate stats.""" bernhard@0: global tainted bernhard@0: d = [] bernhard@0: bernhard@0: # dictionary for testing bernhard@0: #d = ["abc", "aBc", "cde", "efg", "hij", "blubber", "jikf", "zug", "lmf", "opq"] bernhard@0: bernhard@0: # Using the dictionary from Ding **customize** bernhard@0: d = readDingDict(filename="/usr/share/trans/de-en", useLeft=True) bernhard@0: bernhard@0: ## for debugging purpuses, dump dictionary bernhard@0: #dumpfilename = "ddump.txt" bernhard@0: #print("Writing out {}.".format(dumpfilename)) bernhard@0: #with open(dumpfilename, "w") as f: bernhard@0: # for i in d: bernhard@0: # f.write("{}\n".format(i)) bernhard@0: bernhard@0: # Print some stats on the dictionary to be used bernhard@0: dl = len(d) bernhard@0: print("Found {:d} dictionary entries".format(dl)) bernhard@0: if dl < 2000: bernhard@0: print("!Your dictionary is below 2k entries, that is quite small!") bernhard@0: tainted = True bernhard@0: bernhard@0: print("|= Number of words |= possibilities |") bernhard@0: for i in range(1,5): bernhard@0: print("| {:2d} | 2^{:4.1f} |".format( bernhard@0: i, math.log(dl**i,2))) bernhard@0: return d bernhard@0: bernhard@0: bernhard@0: def readDingDict(filename = "/usr/share/trans/de-en", useLeft=False): bernhard@0: """Read dictionary with unique words from file in Ding format. bernhard@0: bernhard@0: useLeft: Boolean to control which language to use bernhard@0: bernhard@0: TODO: add option to use both languages for people that speak them both? bernhard@0: """ bernhard@0: bernhard@0: dset = set() #using the datatype 'set' to aviod duplicates bernhard@0: bernhard@0: splitter = re.compile(r"""\ \|\ # first pattern ' | ' bernhard@0: |;\ # second pattern '; ' bernhard@0: |(?<=\S)/(?=\S) # 3.: '\' surrounded by chars bernhard@0: |\s+ # by whitespace bernhard@0: """,re.VERBOSE) bernhard@0: bernhard@0: print("Reading entries from {}.".format(filename), end='') bernhard@0: counter = 0 # for progress or stopping early bernhard@0: with open(filename, "r") as f: bernhard@0: for line in f: bernhard@0: if line[0] == '#': continue bernhard@0: bernhard@0: # languages are separated by " :: " bernhard@0: p = line.partition(" :: ") bernhard@0: languageEntry = p[0] if useLeft else p[2] bernhard@0: bernhard@0: for word in splitter.split(languageEntry): bernhard@0: word = word.strip('(",.)\'!:;').rstrip('/') bernhard@0: if len(word) > 2 and not word[0] in '[{/': bernhard@0: dset.add(word) bernhard@0: bernhard@0: #TODO: check for very common words and remove them? bernhard@0: bernhard@0: counter += 1 bernhard@0: ## stop early when debugging bernhard@0: #if counter > 10: break bernhard@0: if not counter % 10000: bernhard@0: print('.', end='') bernhard@0: sys.stdout.flush() bernhard@0: print() bernhard@0: bernhard@0: return list(dset) bernhard@0: bernhard@0: def main(): bernhard@0: global tainted bernhard@0: dictionary = buildDictionary() bernhard@0: bernhard@0: howMany = 4 bernhard@0: bernhard@0: # use a dictionary with lower case words for a simple check if bernhard@0: # our random source is okay bernhard@0: print("\nGenerated passphrase with {} randomly selected words:\n".format( bernhard@0: howMany)) bernhard@0: print(" ", end='') bernhard@0: words = {} bernhard@0: for x in range(howMany): bernhard@0: word = _srandom.choice(dictionary) bernhard@0: words[word.lower]= True bernhard@0: print(word, end='\n ') bernhard@0: print("\n") bernhard@0: bernhard@0: if len(words) < howMany: bernhard@0: print("! Your random generator is weak") bernhard@0: print("! or you are being very lucky.") bernhard@0: tainted = True bernhard@0: bernhard@0: if tainted: bernhard@0: print("!!! Don't use the resulting passphrase !!!") bernhard@0: bernhard@0: if __name__ == "__main__": bernhard@0: main()