bernhard@0: #!/usr/bin/env python3
bernhard@4: """Create a random passphrase from a dictionary of words. BETA
bernhard@0: 
bernhard@0: Relies on the entropy of python's
bernhard@0:     random.SystemRandom class
bernhard@0:         which (according to the documentation) calls os.urandom()
bernhard@0:         which (according to the documentation) calls the operating system
bernhard@4:            specific randomness source which "should be unpredictable
bernhard@0:             enough for cryptographic applications"
bernhard@0: 
bernhard@0: Requires:
bernhard@0:   * Python v>=3.2
bernhard@0:   * a dictionary, Ding's trans-de-en by default.
bernhard@0:     E.g. on a Debian/Ubuntu system in package "trans-de-en".
bernhard@0:     or from http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/
bernhard@0: 
bernhard@1: Uses a hardcoded filepath and language.
bernhard@0: Search for **customize** below to change it.
bernhard@0: 
bernhard@3: Related: There is a Go implementation started by Sascha L. Teichmann at
bernhard@3:   https://bitbucket.org/s_l_teichmann/ppgen
bernhard@3: 
bernhard@3: 
bernhard@6: Copyright 2016, 2017 by Intevation GmbH.
bernhard@1: Author: Bernhard E. Reiter <bernhard@intevation.de>
bernhard@0: 
bernhard@0: This file is Free Software under the Apache 2.0 license and thus
bernhard@0: comes without any warranty (to extend permissible under applicable law).
bernhard@0: """
bernhard@0: 
bernhard@4: import argparse
bernhard@0: import math
bernhard@0: import re
bernhard@0: import sys
bernhard@0: 
bernhard@0: from random import SystemRandom
bernhard@0: _srandom = SystemRandom()
bernhard@0: 
bernhard@0: tainted = False   # to be set if we find a hint that the passphrase may be weak
bernhard@0: 
bernhard@4: 
bernhard@4: def buildDictionary(options):
bernhard@0:     """Build up a dictionary of unique words, calculate stats."""
bernhard@0:     global tainted
bernhard@0:     d = []
bernhard@0: 
bernhard@0:     # dictionary for testing
bernhard@4:     #d = ["abc", "aBc", "cde", "efg", "hij", "blubber",
bernhard@4:     #      "jikf", "zug", "lmf", "opq"]
bernhard@2:     # second test dictionary to show that different string functions are used.
bernhard@2:     #d =  [''.join('A' * 1000) for _ in range(1000)]
bernhard@0: 
bernhard@0:     # Using the dictionary from Ding **customize**
bernhard@0:     d = readDingDict(filename="/usr/share/trans/de-en", useLeft=True)
bernhard@0: 
bernhard@5:     ## for debugging purposes, dump dictionary
bernhard@4:     if options.ddump_filename:
bernhard@4:         print("Writing out dictionary in '{}'.".format(options.ddump_filename))
bernhard@4:         with open(options.ddump_filename, "w") as f:
bernhard@4:             for i in d:
bernhard@4:                 f.write("{}\n".format(i))
bernhard@0: 
bernhard@0:     # Print some stats on the dictionary to be used
bernhard@0:     dl = len(d)
bernhard@1:     print("Found {:d} dictionary entries.".format(dl))
bernhard@1:     if dl < 8000:
bernhard@1:         print("!Your dictionary is below 8k entries, that is quite small!")
bernhard@0:         tainted = True
bernhard@0: 
bernhard@0:     print("|= Number of words |= possibilities |")
bernhard@4:     for i in range(1, 5):
bernhard@0:         print("|               {:2d} |    2^{:4.1f}      |".format(
bernhard@4:               i, math.log(dl**i, 2)))
bernhard@0:     return d
bernhard@0: 
bernhard@0: 
bernhard@4: def readDingDict(filename="/usr/share/trans/de-en", useLeft=False):
bernhard@0:     """Read dictionary with unique words from file in Ding format.
bernhard@0: 
bernhard@0:     useLeft: Boolean to control which language to use
bernhard@0: 
bernhard@0:     TODO: add option to use both languages for people that speak them both?
bernhard@0:     """
bernhard@0: 
bernhard@6:     dset = set()  # using the datatype 'set' to avoid duplicates
bernhard@0: 
bernhard@0:     splitter = re.compile(r"""\ \|\  # first pattern  ' | '
bernhard@0:                            |;\       # second pattern '; '
bernhard@6:                            |(?<=\S)/(?=\S)  # 3.:    '/' surrounded by chars
bernhard@0:                            |\s+      # by whitespace
bernhard@4:                            """, re.VERBOSE)
bernhard@0: 
bernhard@0:     print("Reading entries from {}.".format(filename), end='')
bernhard@4:     counter = 0  # for progress or stopping early
bernhard@0:     with open(filename, "r") as f:
bernhard@0:         for line in f:
bernhard@4:             if line[0] == '#':
bernhard@4:                 continue
bernhard@0: 
bernhard@0:             # languages are separated by " :: "
bernhard@0:             p = line.partition(" :: ")
bernhard@0:             languageEntry = p[0] if useLeft else p[2]
bernhard@0: 
bernhard@0:             for word in splitter.split(languageEntry):
bernhard@7:                 word = word.strip('(",.)\'!:;<>').rstrip('/')
bernhard@0:                 if len(word) > 2 and not word[0] in '[{/':
bernhard@0:                     dset.add(word)
bernhard@0: 
bernhard@0:             #TODO: check for very common words and remove them?
bernhard@0: 
bernhard@0:             counter += 1
bernhard@0:             ## stop early when debugging
bernhard@0:             #if counter > 10: break
bernhard@0:             if not counter % 10000:
bernhard@0:                     print('.', end='')
bernhard@0:                     sys.stdout.flush()
bernhard@0:         print()
bernhard@0: 
bernhard@0:     return list(dset)
bernhard@0: 
bernhard@4: 
bernhard@0: def main():
bernhard@0:     global tainted
bernhard@4: 
bernhard@4:     parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
bernhard@4:     parser.add_argument('--ddump-filename',
bernhard@4:                         help='filename to dump the dictionary to')
bernhard@4:     options = parser.parse_args()
bernhard@4: 
bernhard@4:     dictionary = buildDictionary(options)
bernhard@0: 
bernhard@0:     howMany = 4
bernhard@0: 
bernhard@6:     # use a dictionary with lower cased words for a simple check if
bernhard@0:     # our random source is okay
bernhard@0:     print("\nGenerated passphrase with {} randomly selected words:\n".format(
bernhard@0:           howMany))
bernhard@0:     print("    ", end='')
bernhard@0:     words = {}
bernhard@0:     for x in range(howMany):
bernhard@0:         word = _srandom.choice(dictionary)
bernhard@4:         words[word.lower()] = True
bernhard@0:         print(word, end='\n    ')
bernhard@0:     print("\n")
bernhard@0: 
bernhard@0:     if len(words) < howMany:
bernhard@0:         print("! Your random generator is weak")
bernhard@0:         print("! or you are being very lucky.")
bernhard@0:         tainted = True
bernhard@0: 
bernhard@0:     if tainted:
bernhard@0:         print("!!! Don't use the resulting passphrase !!!")
bernhard@0: 
bernhard@0: if __name__ == "__main__":
bernhard@0:     main()