diff options
Diffstat (limited to 'python/mozbuild/mozbuild/shellutil.py')
-rw-r--r-- | python/mozbuild/mozbuild/shellutil.py | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/shellutil.py b/python/mozbuild/mozbuild/shellutil.py new file mode 100644 index 0000000000..36665cf4b1 --- /dev/null +++ b/python/mozbuild/mozbuild/shellutil.py @@ -0,0 +1,210 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + + +def _tokens2re(**tokens): + # Create a pattern for non-escaped tokens, in the form: + # (?<!\\)(?:a|b|c...) + # This is meant to match patterns a, b, or c, or ... if they are not + # preceded by a backslash. + # where a, b, c... are in the form + # (?P<name>pattern) + # which matches the pattern and captures it in a named match group. + # The group names and patterns are given as arguments. + all_tokens = "|".join( + "(?P<%s>%s)" % (name, value) for name, value in tokens.items() + ) + nonescaped = r"(?<!\\)(?:%s)" % all_tokens + + # The final pattern matches either the above pattern, or an escaped + # backslash, captured in the "escape" match group. + return re.compile("(?:%s|%s)" % (nonescaped, r"(?P<escape>\\\\)")) + + +UNQUOTED_TOKENS_RE = _tokens2re( + whitespace=r"[\t\r\n ]+", + quote=r'[\'"]', + comment="#", + special=r"[<>&|`(){}$;\*\?]", + backslashed=r"\\[^\\]", +) + +DOUBLY_QUOTED_TOKENS_RE = _tokens2re( + quote='"', + backslashedquote=r'\\"', + special="\$", + backslashed=r'\\[^\\"]', +) + +ESCAPED_NEWLINES_RE = re.compile(r"\\\n") + +# This regexp contains the same characters as all those listed in +# UNQUOTED_TOKENS_RE. Please keep in sync. +SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]") + + +class MetaCharacterException(Exception): + def __init__(self, char): + self.char = char + + +class _ClineSplitter(object): + """ + Parses a given command line string and creates a list of command + and arguments, with wildcard expansion. + """ + + def __init__(self, cline): + self.arg = None + self.cline = cline + self.result = [] + self._parse_unquoted() + + def _push(self, str): + """ + Push the given string as part of the current argument + """ + if self.arg is None: + self.arg = "" + self.arg += str + + def _next(self): + """ + Finalize current argument, effectively adding it to the list. + """ + if self.arg is None: + return + self.result.append(self.arg) + self.arg = None + + def _parse_unquoted(self): + """ + Parse command line remainder in the context of an unquoted string. + """ + while self.cline: + # Find the next token + m = UNQUOTED_TOKENS_RE.search(self.cline) + # If we find none, the remainder of the string can be pushed to + # the current argument and the argument finalized + if not m: + self._push(self.cline) + break + # The beginning of the string, up to the found token, is part of + # the current argument + if m.start(): + self._push(self.cline[: m.start()]) + self.cline = self.cline[m.end() :] + + match = {name: value for name, value in m.groupdict().items() if value} + if "quote" in match: + # " or ' start a quoted string + if match["quote"] == '"': + self._parse_doubly_quoted() + else: + self._parse_quoted() + elif "comment" in match: + # Comments are ignored. The current argument can be finalized, + # and parsing stopped. + break + elif "special" in match: + # Unquoted, non-escaped special characters need to be sent to a + # shell. + raise MetaCharacterException(match["special"]) + elif "whitespace" in match: + # Whitespaces terminate current argument. + self._next() + elif "escape" in match: + # Escaped backslashes turn into a single backslash + self._push("\\") + elif "backslashed" in match: + # Backslashed characters are unbackslashed + # e.g. echo \a -> a + self._push(match["backslashed"][1]) + else: + raise Exception("Shouldn't reach here") + if self.arg: + self._next() + + def _parse_quoted(self): + # Single quoted strings are preserved, except for the final quote + index = self.cline.find("'") + if index == -1: + raise Exception("Unterminated quoted string in command") + self._push(self.cline[:index]) + self.cline = self.cline[index + 1 :] + + def _parse_doubly_quoted(self): + if not self.cline: + raise Exception("Unterminated quoted string in command") + while self.cline: + m = DOUBLY_QUOTED_TOKENS_RE.search(self.cline) + if not m: + raise Exception("Unterminated quoted string in command") + self._push(self.cline[: m.start()]) + self.cline = self.cline[m.end() :] + match = {name: value for name, value in m.groupdict().items() if value} + if "quote" in match: + # a double quote ends the quoted string, so go back to + # unquoted parsing + return + elif "special" in match: + # Unquoted, non-escaped special characters in a doubly quoted + # string still have a special meaning and need to be sent to a + # shell. + raise MetaCharacterException(match["special"]) + elif "escape" in match: + # Escaped backslashes turn into a single backslash + self._push("\\") + elif "backslashedquote" in match: + # Backslashed double quotes are un-backslashed + self._push('"') + elif "backslashed" in match: + # Backslashed characters are kept backslashed + self._push(match["backslashed"]) + + +def split(cline): + """ + Split the given command line string. + """ + s = ESCAPED_NEWLINES_RE.sub("", cline) + return _ClineSplitter(s).result + + +def _quote(s): + """Given a string, returns a version that can be used literally on a shell + command line, enclosing it with single quotes if necessary. + + As a special case, if given an int, returns a string containing the int, + not enclosed in quotes. + """ + if type(s) == int: + return "%d" % s + + # Empty strings need to be quoted to have any significance + if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"): + return s + + # Single quoted strings can contain any characters unescaped except the + # single quote itself, which can't even be escaped, so the string needs to + # be closed, an escaped single quote added, and reopened. + t = type(s) + return t("'%s'") % s.replace(t("'"), t("'\\''")) + + +def quote(*strings): + """Given one or more strings, returns a quoted string that can be used + literally on a shell command line. + + >>> quote('a', 'b') + "a b" + >>> quote('a b', 'c') + "'a b' c" + """ + return " ".join(_quote(s) for s in strings) + + +__all__ = ["MetaCharacterException", "split", "quote"] |