bh@44: # Copyright (C) 2007 by Intevation GmbH
bh@44: # Authors:
bh@44: # Bernhard Herzog <bh@intevation.de>
bh@44: #
bh@44: # This program is free software under the GPL (>=v2)
bh@44: # Read the file COPYING coming with the software for details.
bh@44: 
bh@44: """Shell like string splitting and expansion"""
bh@44: 
bh@44: import re
bh@44: import shlex
bh@44: 
bh@44: 
bh@44: # helper for the other regular expression matching a python identifier
bh@44: match_identifier = "[_a-zA-Z][_a-zA-Z0-9]*"
bh@44: 
bh@44: # regular expression to use for word expansion matching a dollar
bh@44: # followed by exactly one of these:
bh@44: #  a) another dollar sign or the at-sign (for quoting of these characters)
bh@44: #  b) a python identifier
bh@44: #  c) a python identifier enclosed in braces
bh@44: #  d) something else which indicates invalid use of the dollar sign
bh@44: rx_word_expansion = re.compile(r"\$((?P<delim>[$@])"
bh@44:                                r"|(?P<named>%(identifier)s)"
bh@44:                                r"|\{(?P<braced>%(identifier)s)\}"
bh@44:                                r"|(?P<invalid>))"
bh@44:                                % dict(identifier=match_identifier))
bh@44: 
bh@44: # regular expression matching an entire word that has to be list
bh@44: # expanded.  The regex matches if the word starts with an at-sign.  The
bh@44: # part of the word that followes the at-sign either matches an
bh@44: # identifier with the named group "named" or anything else which
bh@44: # indicates invalid use the at-sign.
bh@44: rx_list_expansion = re.compile(r"^@((?P<named>%(identifier)s)|(?P<invalid>.+))$"
bh@44:                                % dict(identifier=match_identifier))
bh@44: 
bh@44: # match an unquoted at-sign.
bh@44: rx_unquoted_at = re.compile("[^$]@")
bh@44: 
bh@44: def expandword(word, mapping):
bh@44:     def replacment(match):
bh@44:         key = match.group("named") or match.group("braced")
bh@44:         if key:
bh@44:             return str(mapping[key])
bh@44: 
bh@44:         delim = match.group("delim")
bh@44:         if delim:
bh@44:             return delim
bh@44: 
bh@44:         # otherwise invalid has matched and we raise a value error
bh@44:         assert match.group("invalid") != None
bh@44:         raise ValueError
bh@44: 
bh@44:     return rx_word_expansion.sub(replacment, word)
bh@44: 
bh@44: def cmdexpand(string, **kw):
bh@56:     """Split the string into 'words' and expand variable references.
bh@56: 
bh@56: The string is first split into words with shlex.split.  Each of the
bh@56: words is then subjected to either word expansion or list expansion.
bh@56: Word expansion is very similar to what the Template class in Python's
bh@56: string module provides:
bh@56: 
bh@56:   '$$' is expanded to '$'
bh@56: 
bh@56:   '$@' is expanded to '@'
bh@56: 
bh@56:   '$identifier' is expanded to the value of the variable given by
bh@56:   identifier.  The identifier has the same syntax as a normal Python
bh@56:   identifier.  The identifier stops at the first non-identifier
bh@56:   character.  The value is converted to a string with str.
bh@56: 
bh@56:   '${identifier}' is treated like '$identifier' and provides a way to
bh@56:   delimit the identifier in cases where the identifier is followed by
bh@56:   characters that would otherwise be interpreted as part of the
bh@56:   identifier.
bh@56: 
bh@56: A word will remain a single word after the expansion even if the
bh@56: expanded string would be treated as multiple words by shlex.
bh@56: 
bh@56: A list expansion is applied to words that consist of a '@' followed by
bh@56: an identifier.  Nothing else must be in the word.  The variable the
bh@56: identifier refers to must be a sequence and the word will be replaced by
bh@56: the sequence with each element of the sequence converted to a string
bh@56: with str.
bh@56: 
bh@56: The variables known to the function are the keyword arguments.
bh@56: 
bh@56: Examples:
bh@56: 
bh@56:   >>> from cmdexpand import cmdexpand
bh@56:   >>> cmdexpand("ssh $user$@$host", user="john", host="python")
bh@56:   ['ssh', 'john@python']
bh@56: 
bh@56:   >>> cmdexpand("scp @files $user$@$host:$remotedir", user="john",
bh@56:   ...           host="python", files=["main.py", "cmdexpand.py"],
bh@56:   ...           remotedir="/home/john/files")
bh@56:   ['scp', 'main.py', 'cmdexpand.py', 'john@python:/home/john/files']
bh@56: """
bh@44:     words = shlex.split(string)
bh@44:     for index, word in reversed(list(enumerate(words))):
bh@44:         match = rx_unquoted_at.search(word)
bh@44:         if match:
bh@44:             raise ValueError("%r contains an unquoted '@'" % word)
bh@44:         match = rx_list_expansion.match(word)
bh@44:         if match:
bh@44:             key = match.group("named")
bh@44:             if key:
bricks@574:                 words[index:index + 1] = (str(item) for item in kw[key])
bh@44:             else:
bh@44:                 assert match.group("invalid") != None
bh@44:                 raise ValueError("In %r the characters after the '@'"
bh@44:                                  " do not match a python identifier" % word)
bh@44:         else:
bricks@575:             words[index] = expandword(word, kw)
bh@44:     return words