1 files changed, 1432 insertions, 0 deletions
diff --git a/python/mozbuild/mozbuild/frontend/reader.py b/python/mozbuild/mozbuild/frontend/reader.py
new file mode 100644
index 0000000000..9d624b37ec
--- /dev/null
+++ b/python/mozbuild/mozbuild/frontend/reader.py
@@ -0,0 +1,1432 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file contains code for reading metadata from the build system into
+# data structures.
+
+r"""Read build frontend files into data structures.
+
+In terms of code architecture, the main interface is BuildReader. BuildReader
+starts with a root mozbuild file. It creates a new execution environment for
+this file, which is represented by the Sandbox class. The Sandbox class is used
+to fill a Context, representing the output of an individual mozbuild file. The
+
+The BuildReader contains basic logic for traversing a tree of mozbuild files.
+It does this by examining specific variables populated during execution.
+"""
+
+import ast
+import inspect
+import logging
+import os
+import sys
+import textwrap
+import time
+import traceback
+import types
+from collections import OrderedDict, defaultdict
+from concurrent.futures.process import ProcessPoolExecutor
+from io import StringIO
+from itertools import chain
+from multiprocessing import cpu_count
+
+import mozpack.path as mozpath
+import six
+from mozpack.files import FileFinder
+from six import string_types
+
+from mozbuild.backend.configenvironment import ConfigEnvironment
+from mozbuild.base import ExecutionSummary
+from mozbuild.util import (
+    EmptyValue,
+    HierarchicalStringList,
+    ReadOnlyDefaultDict,
+    memoize,
+)
+
+from .context import (
+    DEPRECATION_HINTS,
+    FUNCTIONS,
+    SPECIAL_VARIABLES,
+    SUBCONTEXTS,
+    VARIABLES,
+    Context,
+    ContextDerivedValue,
+    Files,
+    SourcePath,
+    SubContext,
+    TemplateContext,
+)
+from .sandbox import (
+    Sandbox,
+    SandboxError,
+    SandboxExecutionError,
+    SandboxLoadError,
+    default_finder,
+)
+
+if six.PY2:
+    type_type = types.TypeType
+else:
+    type_type = type
+
+
+def log(logger, level, action, params, formatter):
+    logger.log(level, formatter, extra={"action": action, "params": params})
+
+
+class EmptyConfig(object):
+    """A config object that is empty.
+
+    This config object is suitable for using with a BuildReader on a vanilla
+    checkout, without any existing configuration. The config is simply
+    bootstrapped from a top source directory path.
+    """
+
+    class PopulateOnGetDict(ReadOnlyDefaultDict):
+        """A variation on ReadOnlyDefaultDict that populates during .get().
+
+        This variation is needed because CONFIG uses .get() to access members.
+        Without it, None (instead of our EmptyValue types) would be returned.
+        """
+
+        def get(self, key, default=None):
+            return self[key]
+
+    default_substs = {
+        # These 2 variables are used semi-frequently and it isn't worth
+        # changing all the instances.
+        "MOZ_APP_NAME": "empty",
+        "MOZ_CHILD_PROCESS_NAME": "empty",
+        # Needed to prevent js/src's config.status from loading.
+        "JS_STANDALONE": "1",
+    }
+
+    def __init__(self, topsrcdir, substs=None):
+        self.topsrcdir = topsrcdir
+        self.topobjdir = ""
+
+        self.substs = self.PopulateOnGetDict(EmptyValue, substs or self.default_substs)
+        self.defines = self.substs
+        self.error_is_fatal = False
+
+
+def is_read_allowed(path, config):
+    """Whether we are allowed to load a mozbuild file at the specified path.
+
+    This is used as cheap security to ensure the build is isolated to known
+    source directories.
+
+    We are allowed to read from the main source directory and any defined
+    external source directories. The latter is to allow 3rd party applications
+    to hook into our build system.
+    """
+    assert os.path.isabs(path)
+    assert os.path.isabs(config.topsrcdir)
+
+    path = mozpath.normpath(path)
+    topsrcdir = mozpath.normpath(config.topsrcdir)
+
+    if mozpath.basedir(path, [topsrcdir]):
+        return True
+
+    return False
+
+
+class SandboxCalledError(SandboxError):
+    """Represents an error resulting from calling the error() function."""
+
+    def __init__(self, file_stack, message):
+        SandboxError.__init__(self, file_stack)
+        self.message = message
+
+
+class MozbuildSandbox(Sandbox):
+    """Implementation of a Sandbox tailored for mozbuild files.
+
+    We expose a few useful functions and expose the set of variables defining
+    Mozilla's build system.
+
+    context is a Context instance.
+
+    metadata is a dict of metadata that can be used during the sandbox
+    evaluation.
+    """
+
+    def __init__(self, context, metadata={}, finder=default_finder):
+        assert isinstance(context, Context)
+
+        Sandbox.__init__(self, context, finder=finder)
+
+        self._log = logging.getLogger(__name__)
+
+        self.metadata = dict(metadata)
+        exports = self.metadata.get("exports", {})
+        self.exports = set(exports.keys())
+        context.update(exports)
+        self.templates = self.metadata.setdefault("templates", {})
+        self.special_variables = self.metadata.setdefault(
+            "special_variables", SPECIAL_VARIABLES
+        )
+        self.functions = self.metadata.setdefault("functions", FUNCTIONS)
+        self.subcontext_types = self.metadata.setdefault("subcontexts", SUBCONTEXTS)
+
+    def __getitem__(self, key):
+        if key in self.special_variables:
+            return self.special_variables[key][0](self._context)
+        if key in self.functions:
+            return self._create_function(self.functions[key])
+        if key in self.subcontext_types:
+            return self._create_subcontext(self.subcontext_types[key])
+        if key in self.templates:
+            return self._create_template_wrapper(self.templates[key])
+        return Sandbox.__getitem__(self, key)
+
+    def __contains__(self, key):
+        if any(
+            key in d
+            for d in (
+                self.special_variables,
+                self.functions,
+                self.subcontext_types,
+                self.templates,
+            )
+        ):
+            return True
+
+        return Sandbox.__contains__(self, key)
+
+    def __setitem__(self, key, value):
+        if key in self.special_variables and value is self[key]:
+            return
+        if (
+            key in self.special_variables
+            or key in self.functions
+            or key in self.subcontext_types
+        ):
+            raise KeyError('Cannot set "%s" because it is a reserved keyword' % key)
+        if key in self.exports:
+            self._context[key] = value
+            self.exports.remove(key)
+            return
+        Sandbox.__setitem__(self, key, value)
+
+    def exec_file(self, path):
+        """Override exec_file to normalize paths and restrict file loading.
+
+        Paths will be rejected if they do not fall under topsrcdir or one of
+        the external roots.
+        """
+
+        # realpath() is needed for true security. But, this isn't for security
+        # protection, so it is omitted.
+        if not is_read_allowed(path, self._context.config):
+            raise SandboxLoadError(
+                self._context.source_stack, sys.exc_info()[2], illegal_path=path
+            )
+
+        Sandbox.exec_file(self, path)
+
+    def _export(self, varname):
+        """Export the variable to all subdirectories of the current path."""
+
+        exports = self.metadata.setdefault("exports", dict())
+        if varname in exports:
+            raise Exception("Variable has already been exported: %s" % varname)
+
+        try:
+            # Doing a regular self._context[varname] causes a set as a side
+            # effect. By calling the dict method instead, we don't have any
+            # side effects.
+            exports[varname] = dict.__getitem__(self._context, varname)
+        except KeyError:
+            self.last_name_error = KeyError("global_ns", "get_unknown", varname)
+            raise self.last_name_error
+
+    def recompute_exports(self):
+        """Recompute the variables to export to subdirectories with the current
+        values in the subdirectory."""
+
+        if "exports" in self.metadata:
+            for key in self.metadata["exports"]:
+                self.metadata["exports"][key] = self[key]
+
+    def _include(self, path):
+        """Include and exec another file within the context of this one."""
+
+        # path is a SourcePath
+        self.exec_file(path.full_path)
+
+    def _warning(self, message):
+        # FUTURE consider capturing warnings in a variable instead of printing.
+        print("WARNING: %s" % message, file=sys.stderr)
+
+    def _error(self, message):
+        if self._context.error_is_fatal:
+            raise SandboxCalledError(self._context.source_stack, message)
+        else:
+            self._warning(message)
+
+    def _template_decorator(self, func):
+        """Registers a template function."""
+
+        if not inspect.isfunction(func):
+            raise Exception(
+                "`template` is a function decorator. You must "
+                "use it as `@template` preceding a function declaration."
+            )
+
+        name = func.__name__
+
+        if name in self.templates:
+            raise KeyError(
+                'A template named "%s" was already declared in %s.'
+                % (name, self.templates[name].path)
+            )
+
+        if name.islower() or name.isupper() or name[0].islower():
+            raise NameError("Template function names must be CamelCase.")
+
+        self.templates[name] = TemplateFunction(func, self)
+
+    @memoize
+    def _create_subcontext(self, cls):
+        """Return a function object that creates SubContext instances."""
+
+        def fn(*args, **kwargs):
+            return cls(self._context, *args, **kwargs)
+
+        return fn
+
+    @memoize
+    def _create_function(self, function_def):
+        """Returns a function object for use within the sandbox for the given
+        function definition.
+
+        The wrapper function does type coercion on the function arguments
+        """
+        func, args_def, doc = function_def
+
+        def function(*args):
+            def coerce(arg, type):
+                if not isinstance(arg, type):
+                    if issubclass(type, ContextDerivedValue):
+                        arg = type(self._context, arg)
+                    else:
+                        arg = type(arg)
+                return arg
+
+            args = [coerce(arg, type) for arg, type in zip(args, args_def)]
+            return func(self)(*args)
+
+        return function
+
+    @memoize
+    def _create_template_wrapper(self, template):
+        """Returns a function object for use within the sandbox for the given
+        TemplateFunction instance..
+
+        When a moz.build file contains a reference to a template call, the
+        sandbox needs a function to execute. This is what this method returns.
+        That function creates a new sandbox for execution of the template.
+        After the template is executed, the data from its execution is merged
+        with the context of the calling sandbox.
+        """
+
+        def template_wrapper(*args, **kwargs):
+            context = TemplateContext(
+                template=template.name,
+                allowed_variables=self._context._allowed_variables,
+                config=self._context.config,
+            )
+            context.add_source(self._context.current_path)
+            for p in self._context.all_paths:
+                context.add_source(p)
+
+            sandbox = MozbuildSandbox(
+                context,
+                metadata={
+                    # We should arguably set these defaults to something else.
+                    # Templates, for example, should arguably come from the state
+                    # of the sandbox from when the template was declared, not when
+                    # it was instantiated. Bug 1137319.
+                    "functions": self.metadata.get("functions", {}),
+                    "special_variables": self.metadata.get("special_variables", {}),
+                    "subcontexts": self.metadata.get("subcontexts", {}),
+                    "templates": self.metadata.get("templates", {}),
+                },
+                finder=self._finder,
+            )
+
+            template.exec_in_sandbox(sandbox, *args, **kwargs)
+
+            # This is gross, but allows the merge to happen. Eventually, the
+            # merging will go away and template contexts emitted independently.
+            klass = self._context.__class__
+            self._context.__class__ = TemplateContext
+            # The sandbox will do all the necessary checks for these merges.
+            for key, value in context.items():
+                if isinstance(value, dict):
+                    self[key].update(value)
+                elif isinstance(value, (list, HierarchicalStringList)):
+                    self[key] += value
+                else:
+                    self[key] = value
+            self._context.__class__ = klass
+
+            for p in context.all_paths:
+                self._context.add_source(p)
+
+        return template_wrapper
+
+
+class TemplateFunction(object):
+    def __init__(self, func, sandbox):
+        self.path = func.__code__.co_filename
+        self.name = func.__name__
+
+        code = func.__code__
+        firstlineno = code.co_firstlineno
+        lines = sandbox._current_source.splitlines(True)
+        if lines:
+            # Older versions of python 2.7 had a buggy inspect.getblock() that
+            # would ignore the last line if it didn't terminate with a newline.
+            if not lines[-1].endswith("\n"):
+                lines[-1] += "\n"
+        lines = inspect.getblock(lines[firstlineno - 1 :])
+
+        # The code lines we get out of inspect.getsourcelines look like
+        #   @template
+        #   def Template(*args, **kwargs):
+        #       VAR = 'value'
+        #       ...
+        func_ast = ast.parse("".join(lines), self.path)
+        # Remove decorators
+        func_ast.body[0].decorator_list = []
+        # Adjust line numbers accordingly
+        ast.increment_lineno(func_ast, firstlineno - 1)
+
+        # When using a custom dictionary for function globals/locals, Cpython
+        # actually never calls __getitem__ and __setitem__, so we need to
+        # modify the AST so that accesses to globals are properly directed
+        # to a dict. AST wants binary_type for this in Py2 and text_type for
+        # this in Py3, so cast to str.
+        self._global_name = str("_data")
+        # In case '_data' is a name used for a variable in the function code,
+        # prepend more underscores until we find an unused name.
+        while (
+            self._global_name in code.co_names or self._global_name in code.co_varnames
+        ):
+            self._global_name += str("_")
+        func_ast = self.RewriteName(sandbox, self._global_name).visit(func_ast)
+
+        # Execute the rewritten code. That code now looks like:
+        #   def Template(*args, **kwargs):
+        #       _data['VAR'] = 'value'
+        #       ...
+        # The result of executing this code is the creation of a 'Template'
+        # function object in the global namespace.
+        glob = {"__builtins__": sandbox._builtins}
+        func = types.FunctionType(
+            compile(func_ast, self.path, "exec"),
+            glob,
+            self.name,
+            func.__defaults__,
+            func.__closure__,
+        )
+        func()
+
+        self._func = glob[self.name]
+
+    def exec_in_sandbox(self, sandbox, *args, **kwargs):
+        """Executes the template function in the given sandbox."""
+        # Create a new function object associated with the execution sandbox
+        glob = {self._global_name: sandbox, "__builtins__": sandbox._builtins}
+        func = types.FunctionType(
+            self._func.__code__,
+            glob,
+            self.name,
+            self._func.__defaults__,
+            self._func.__closure__,
+        )
+        sandbox.exec_function(func, args, kwargs, self.path, becomes_current_path=False)
+
+    class RewriteName(ast.NodeTransformer):
+        """AST Node Transformer to rewrite variable accesses to go through
+        a dict.
+        """
+
+        def __init__(self, sandbox, global_name):
+            self._sandbox = sandbox
+            self._global_name = global_name
+
+        def visit_Str(self, node):
+            node.s = six.ensure_text(node.s)
+            return node
+
+        def visit_Name(self, node):
+            # Modify uppercase variable references and names known to the
+            # sandbox as if they were retrieved from a dict instead.
+            if not node.id.isupper() and node.id not in self._sandbox:
+                return node
+
+            def c(new_node):
+                return ast.copy_location(new_node, node)
+
+            return c(
+                ast.Subscript(
+                    value=c(ast.Name(id=self._global_name, ctx=ast.Load())),
+                    slice=c(ast.Index(value=c(ast.Str(s=node.id)))),
+                    ctx=node.ctx,
+                )
+            )
+
+
+class SandboxValidationError(Exception):
+    """Represents an error encountered when validating sandbox results."""
+
+    def __init__(self, message, context):
+        Exception.__init__(self, message)
+        self.context = context
+
+    def __str__(self):
+        s = StringIO()
+
+        delim = "=" * 30
+        s.write("\n%s\nFATAL ERROR PROCESSING MOZBUILD FILE\n%s\n\n" % (delim, delim))
+
+        s.write("The error occurred while processing the following file or ")
+        s.write("one of the files it includes:\n")
+        s.write("\n")
+        s.write("    %s/moz.build\n" % self.context.srcdir)
+        s.write("\n")
+
+        s.write("The error occurred when validating the result of ")
+        s.write("the execution. The reported error is:\n")
+        s.write("\n")
+        s.write(
+            "".join(
+                "    %s\n" % l
+                for l in super(SandboxValidationError, self).__str__().splitlines()
+            )
+        )
+        s.write("\n")
+
+        return s.getvalue()
+
+
+class BuildReaderError(Exception):
+    """Represents errors encountered during BuildReader execution.
+
+    The main purpose of this class is to facilitate user-actionable error
+    messages. Execution errors should say:
+
+      - Why they failed
+      - Where they failed
+      - What can be done to prevent the error
+
+    A lot of the code in this class should arguably be inside sandbox.py.
+    However, extraction is somewhat difficult given the additions
+    MozbuildSandbox has over Sandbox (e.g. the concept of included files -
+    which affect error messages, of course).
+    """
+
+    def __init__(
+        self,
+        file_stack,
+        trace,
+        sandbox_exec_error=None,
+        sandbox_load_error=None,
+        validation_error=None,
+        other_error=None,
+        sandbox_called_error=None,
+    ):
+
+        self.file_stack = file_stack
+        self.trace = trace
+        self.sandbox_called_error = sandbox_called_error
+        self.sandbox_exec = sandbox_exec_error
+        self.sandbox_load = sandbox_load_error
+        self.validation_error = validation_error
+        self.other = other_error
+
+    @property
+    def main_file(self):
+        return self.file_stack[-1]
+
+    @property
+    def actual_file(self):
+        # We report the file that called out to the file that couldn't load.
+        if self.sandbox_load is not None:
+            if len(self.sandbox_load.file_stack) > 1:
+                return self.sandbox_load.file_stack[-2]
+
+            if len(self.file_stack) > 1:
+                return self.file_stack[-2]
+
+        if self.sandbox_error is not None and len(self.sandbox_error.file_stack):
+            return self.sandbox_error.file_stack[-1]
+
+        return self.file_stack[-1]
+
+    @property
+    def sandbox_error(self):
+        return self.sandbox_exec or self.sandbox_load or self.sandbox_called_error
+
+    def __str__(self):
+        s = StringIO()
+
+        delim = "=" * 30
+        s.write("\n%s\nFATAL ERROR PROCESSING MOZBUILD FILE\n%s\n\n" % (delim, delim))
+
+        s.write("The error occurred while processing the following file:\n")
+        s.write("\n")
+        s.write("    %s\n" % self.actual_file)
+        s.write("\n")
+
+        if self.actual_file != self.main_file and not self.sandbox_load:
+            s.write("This file was included as part of processing:\n")
+            s.write("\n")
+            s.write("    %s\n" % self.main_file)
+            s.write("\n")
+
+        if self.sandbox_error is not None:
+            self._print_sandbox_error(s)
+        elif self.validation_error is not None:
+            s.write("The error occurred when validating the result of ")
+            s.write("the execution. The reported error is:\n")
+            s.write("\n")
+            s.write(
+                "".join(
+                    "    %s\n" % l
+                    for l in six.text_type(self.validation_error).splitlines()
+                )
+            )
+            s.write("\n")
+        else:
+            s.write("The error appears to be part of the %s " % __name__)
+            s.write("Python module itself! It is possible you have stumbled ")
+            s.write("across a legitimate bug.\n")
+            s.write("\n")
+
+            for l in traceback.format_exception(
+                type(self.other), self.other, self.trace
+            ):
+                s.write(six.ensure_text(l))
+
+        return s.getvalue()
+
+    def _print_sandbox_error(self, s):
+        # Try to find the frame of the executed code.
+        script_frame = None
+
+        # We don't currently capture the trace for SandboxCalledError.
+        # Therefore, we don't get line numbers from the moz.build file.
+        # FUTURE capture this.
+        trace = getattr(self.sandbox_error, "trace", None)
+        frames = []
+        if trace:
+            frames = traceback.extract_tb(trace)
+        for frame in frames:
+            if frame[0] == self.actual_file:
+                script_frame = frame
+
+            # Reset if we enter a new execution context. This prevents errors
+            # in this module from being attributes to a script.
+            elif frame[0] == __file__ and frame[2] == "exec_function":
+                script_frame = None
+
+        if script_frame is not None:
+            s.write("The error was triggered on line %d " % script_frame[1])
+            s.write("of this file:\n")
+            s.write("\n")
+            s.write("    %s\n" % script_frame[3])
+            s.write("\n")
+
+        if self.sandbox_called_error is not None:
+            self._print_sandbox_called_error(s)
+            return
+
+        if self.sandbox_load is not None:
+            self._print_sandbox_load_error(s)
+            return
+
+        self._print_sandbox_exec_error(s)
+
+    def _print_sandbox_called_error(self, s):
+        assert self.sandbox_called_error is not None
+
+        s.write("A moz.build file called the error() function.\n")
+        s.write("\n")
+        s.write("The error it encountered is:\n")
+        s.write("\n")
+        s.write("    %s\n" % self.sandbox_called_error.message)
+        s.write("\n")
+        s.write("Correct the error condition and try again.\n")
+
+    def _print_sandbox_load_error(self, s):
+        assert self.sandbox_load is not None
+
+        if self.sandbox_load.illegal_path is not None:
+            s.write("The underlying problem is an illegal file access. ")
+            s.write("This is likely due to trying to access a file ")
+            s.write("outside of the top source directory.\n")
+            s.write("\n")
+            s.write("The path whose access was denied is:\n")
+            s.write("\n")
+            s.write("    %s\n" % self.sandbox_load.illegal_path)
+            s.write("\n")
+            s.write("Modify the script to not access this file and ")
+            s.write("try again.\n")
+            return
+
+        if self.sandbox_load.read_error is not None:
+            if not os.path.exists(self.sandbox_load.read_error):
+                s.write("The underlying problem is we referenced a path ")
+                s.write("that does not exist. That path is:\n")
+                s.write("\n")
+                s.write("    %s\n" % self.sandbox_load.read_error)
+                s.write("\n")
+                s.write("Either create the file if it needs to exist or ")
+                s.write("do not reference it.\n")
+            else:
+                s.write("The underlying problem is a referenced path could ")
+                s.write("not be read. The trouble path is:\n")
+                s.write("\n")
+                s.write("    %s\n" % self.sandbox_load.read_error)
+                s.write("\n")
+                s.write("It is possible the path is not correct. Is it ")
+                s.write("pointing to a directory? It could also be a file ")
+                s.write("permissions issue. Ensure that the file is ")
+                s.write("readable.\n")
+
+            return
+
+        # This module is buggy if you see this.
+        raise AssertionError("SandboxLoadError with unhandled properties!")
+
+    def _print_sandbox_exec_error(self, s):
+        assert self.sandbox_exec is not None
+
+        inner = self.sandbox_exec.exc_value
+
+        if isinstance(inner, SyntaxError):
+            s.write("The underlying problem is a Python syntax error ")
+            s.write("on line %d:\n" % inner.lineno)
+            s.write("\n")
+            s.write("    %s\n" % inner.text)
+            if inner.offset:
+                s.write((" " * (inner.offset + 4)) + "^\n")
+            s.write("\n")
+            s.write("Fix the syntax error and try again.\n")
+            return
+
+        if isinstance(inner, KeyError):
+            self._print_keyerror(inner, s)
+        elif isinstance(inner, ValueError):
+            self._print_valueerror(inner, s)
+        else:
+            self._print_exception(inner, s)
+
+    def _print_keyerror(self, inner, s):
+        if not inner.args or inner.args[0] not in ("global_ns", "local_ns"):
+            self._print_exception(inner, s)
+            return
+
+        if inner.args[0] == "global_ns":
+            import difflib
+
+            verb = None
+            if inner.args[1] == "get_unknown":
+                verb = "read"
+            elif inner.args[1] == "set_unknown":
+                verb = "write"
+            elif inner.args[1] == "reassign":
+                s.write("The underlying problem is an attempt to reassign ")
+                s.write("a reserved UPPERCASE variable.\n")
+                s.write("\n")
+                s.write("The reassigned variable causing the error is:\n")
+                s.write("\n")
+                s.write("    %s\n" % inner.args[2])
+                s.write("\n")
+                s.write('Maybe you meant "+=" instead of "="?\n')
+                return
+            else:
+                raise AssertionError("Unhandled global_ns: %s" % inner.args[1])
+
+            s.write("The underlying problem is an attempt to %s " % verb)
+            s.write("a reserved UPPERCASE variable that does not exist.\n")
+            s.write("\n")
+            s.write("The variable %s causing the error is:\n" % verb)
+            s.write("\n")
+            s.write("    %s\n" % inner.args[2])
+            s.write("\n")
+            close_matches = difflib.get_close_matches(
+                inner.args[2], VARIABLES.keys(), 2
+            )
+            if close_matches:
+                s.write("Maybe you meant %s?\n" % " or ".join(close_matches))
+                s.write("\n")
+
+            if inner.args[2] in DEPRECATION_HINTS:
+                s.write(
+                    "%s\n" % textwrap.dedent(DEPRECATION_HINTS[inner.args[2]]).strip()
+                )
+                return
+
+            s.write("Please change the file to not use this variable.\n")
+            s.write("\n")
+            s.write("For reference, the set of valid variables is:\n")
+            s.write("\n")
+            s.write(", ".join(sorted(VARIABLES.keys())) + "\n")
+            return
+
+        s.write("The underlying problem is a reference to an undefined ")
+        s.write("local variable:\n")
+        s.write("\n")
+        s.write("    %s\n" % inner.args[2])
+        s.write("\n")
+        s.write("Please change the file to not reference undefined ")
+        s.write("variables and try again.\n")
+
+    def _print_valueerror(self, inner, s):
+        if not inner.args or inner.args[0] not in ("global_ns", "local_ns"):
+            self._print_exception(inner, s)
+            return
+
+        assert inner.args[1] == "set_type"
+
+        s.write("The underlying problem is an attempt to write an illegal ")
+        s.write("value to a special variable.\n")
+        s.write("\n")
+        s.write("The variable whose value was rejected is:\n")
+        s.write("\n")
+        s.write("    %s" % inner.args[2])
+        s.write("\n")
+        s.write("The value being written to it was of the following type:\n")
+        s.write("\n")
+        s.write("    %s\n" % type(inner.args[3]).__name__)
+        s.write("\n")
+        s.write("This variable expects the following type(s):\n")
+        s.write("\n")
+        if type(inner.args[4]) == type_type:
+            s.write("    %s\n" % inner.args[4].__name__)
+        else:
+            for t in inner.args[4]:
+                s.write("    %s\n" % t.__name__)
+        s.write("\n")
+        s.write("Change the file to write a value of the appropriate type ")
+        s.write("and try again.\n")
+
+    def _print_exception(self, e, s):
+        s.write("An error was encountered as part of executing the file ")
+        s.write("itself. The error appears to be the fault of the script.\n")
+        s.write("\n")
+        s.write("The error as reported by Python is:\n")
+        s.write("\n")
+        s.write("    %s\n" % traceback.format_exception_only(type(e), e))
+
+
+class BuildReader(object):
+    """Read a tree of mozbuild files into data structures.
+
+    This is where the build system starts. You give it a tree configuration
+    (the output of configuration) and it executes the moz.build files and
+    collects the data they define.
+
+    The reader can optionally call a callable after each sandbox is evaluated
+    but before its evaluated content is processed. This gives callers the
+    opportunity to modify contexts before side-effects occur from their
+    content. This callback receives the ``Context`` containing the result of
+    each sandbox evaluation. Its return value is ignored.
+    """
+
+    def __init__(self, config, finder=default_finder):
+        self.config = config
+
+        self._log = logging.getLogger(__name__)
+        self._read_files = set()
+        self._execution_stack = []
+        self.finder = finder
+
+        # Finder patterns to ignore when searching for moz.build files.
+        ignores = {
+            # Ignore fake moz.build files used for testing moz.build.
+            "python/mozbuild/mozbuild/test",
+            "testing/mozbase/moztest/tests/data",
+            # Ignore object directories.
+            "obj*",
+        }
+
+        self._relevant_mozbuild_finder = FileFinder(
+            self.config.topsrcdir, ignore=ignores
+        )
+
+        # Also ignore any other directories that could be objdirs, they don't
+        # necessarily start with the string 'obj'.
+        for path, f in self._relevant_mozbuild_finder.find("*/config.status"):
+            self._relevant_mozbuild_finder.ignore.add(os.path.dirname(path))
+
+        max_workers = cpu_count()
+        if sys.platform.startswith("win"):
+            # In python 3, on Windows, ProcessPoolExecutor uses
+            # _winapi.WaitForMultipleObjects, which doesn't work on large
+            # number of objects. It also has some automatic capping to avoid
+            # _winapi.WaitForMultipleObjects being unhappy as a consequence,
+            # but that capping is actually insufficient in python 3.7 and 3.8
+            # (as well as inexistent in older versions). So we cap ourselves
+            # to 60, see https://bugs.python.org/issue26903#msg365886.
+            max_workers = min(max_workers, 60)
+        self._gyp_worker_pool = ProcessPoolExecutor(max_workers=max_workers)
+        self._gyp_processors = []
+        self._execution_time = 0.0
+        self._file_count = 0
+        self._gyp_execution_time = 0.0
+        self._gyp_file_count = 0
+
+    def summary(self):
+        return ExecutionSummary(
+            "Finished reading {file_count:d} moz.build files in "
+            "{execution_time:.2f}s",
+            file_count=self._file_count,
+            execution_time=self._execution_time,
+        )
+
+    def gyp_summary(self):
+        return ExecutionSummary(
+            "Read {file_count:d} gyp files in parallel contributing "
+            "{execution_time:.2f}s to total wall time",
+            file_count=self._gyp_file_count,
+            execution_time=self._gyp_execution_time,
+        )
+
+    def read_topsrcdir(self):
+        """Read the tree of linked moz.build files.
+
+        This starts with the tree's top-most moz.build file and descends into
+        all linked moz.build files until all relevant files have been evaluated.
+
+        This is a generator of Context instances. As each moz.build file is
+        read, a new Context is created and emitted.
+        """
+        path = mozpath.join(self.config.topsrcdir, "moz.build")
+        for r in self.read_mozbuild(path, self.config):
+            yield r
+        all_gyp_paths = set()
+        for g in self._gyp_processors:
+            for gyp_context in g.results:
+                all_gyp_paths |= gyp_context.all_paths
+                yield gyp_context
+            self._gyp_execution_time += g.execution_time
+        self._gyp_file_count += len(all_gyp_paths)
+        self._gyp_worker_pool.shutdown()
+
+    def all_mozbuild_paths(self):
+        """Iterator over all available moz.build files.
+
+        This method has little to do with the reader. It should arguably belong
+        elsewhere.
+        """
+        # In the future, we may traverse moz.build files by looking
+        # for DIRS references in the AST, even if a directory is added behind
+        # a conditional. For now, just walk the filesystem.
+        for path, f in self._relevant_mozbuild_finder.find("**/moz.build"):
+            yield path
+
+    def find_variables_from_ast(self, variables, path=None):
+        """Finds all assignments to the specified variables by parsing
+        moz.build abstract syntax trees.
+
+        This function only supports two cases, as detailed below.
+
+        1) A dict. Keys and values should both be strings, e.g:
+
+            VARIABLE['foo'] = 'bar'
+
+        This is an `Assign` node with a `Subscript` target. The `Subscript`'s
+        value is a `Name` node with id "VARIABLE". The slice of this target is
+        an `Index` node and its value is a `Str` with value "foo".
+
+        2) A simple list. Values should be strings, e.g: The target of the
+        assignment should be a Name node. Values should be a List node,
+        whose elements are Str nodes. e.g:
+
+            VARIABLE += ['foo']
+
+        This is an `AugAssign` node with a `Name` target with id "VARIABLE".
+        The value is a `List` node containing one `Str` element whose value is
+        "foo".
+
+        With a little work, this function could support other types of
+        assignment. But if we end up writing a lot of AST code, it might be
+        best to import a high-level AST manipulation library into the tree.
+
+        Args:
+            variables (list): A list of variable assignments to capture.
+            path (str): A path relative to the source dir. If specified, only
+                `moz.build` files relevant to this path will be parsed. Otherwise
+                all `moz.build` files are parsed.
+
+        Returns:
+            A generator that generates tuples of the form `(<moz.build path>,
+            <variable name>, <key>, <value>)`. The `key` will only be
+            defined if the variable is an object, otherwise it is `None`.
+        """
+
+        if isinstance(variables, string_types):
+            variables = [variables]
+
+        def assigned_variable(node):
+            # This is not correct, but we don't care yet.
+            if hasattr(node, "targets"):
+                # Nothing in moz.build does multi-assignment (yet). So error if
+                # we see it.
+                assert len(node.targets) == 1
+
+                target = node.targets[0]
+            else:
+                target = node.target
+
+            if isinstance(target, ast.Subscript):
+                if not isinstance(target.value, ast.Name):
+                    return None, None
+                name = target.value.id
+            elif isinstance(target, ast.Name):
+                name = target.id
+            else:
+                return None, None
+
+            if name not in variables:
+                return None, None
+
+            key = None
+            if isinstance(target, ast.Subscript):
+                # We need to branch to deal with python version differences.
+                if isinstance(target.slice, ast.Constant):
+                    # Python >= 3.9
+                    assert isinstance(target.slice.value, str)
+                    key = target.slice.value
+                else:
+                    # Others
+                    assert isinstance(target.slice, ast.Index)
+                    assert isinstance(target.slice.value, ast.Str)
+                    key = target.slice.value.s
+
+            return name, key
+
+        def assigned_values(node):
+            value = node.value
+            if isinstance(value, ast.List):
+                for v in value.elts:
+                    assert isinstance(v, ast.Str)
+                    yield v.s
+            else:
+                assert isinstance(value, ast.Str)
+                yield value.s
+
+        assignments = []
+
+        class Visitor(ast.NodeVisitor):
+            def helper(self, node):
+                name, key = assigned_variable(node)
+                if not name:
+                    return
+
+                for v in assigned_values(node):
+                    assignments.append((name, key, v))
+
+            def visit_Assign(self, node):
+                self.helper(node)
+
+            def visit_AugAssign(self, node):
+                self.helper(node)
+
+        if path:
+            mozbuild_paths = chain(*self._find_relevant_mozbuilds([path]).values())
+        else:
+            mozbuild_paths = self.all_mozbuild_paths()
+
+        for p in mozbuild_paths:
+            assignments[:] = []
+            full = os.path.join(self.config.topsrcdir, p)
+
+            with open(full, "rb") as fh:
+                source = fh.read()
+
+            tree = ast.parse(source, full)
+            Visitor().visit(tree)
+
+            for name, key, value in assignments:
+                yield p, name, key, value
+
+    def read_mozbuild(self, path, config, descend=True, metadata={}):
+        """Read and process a mozbuild file, descending into children.
+
+        This starts with a single mozbuild file, executes it, and descends into
+        other referenced files per our traversal logic.
+
+        The traversal logic is to iterate over the ``*DIRS`` variables, treating
+        each element as a relative directory path. For each encountered
+        directory, we will open the moz.build file located in that
+        directory in a new Sandbox and process it.
+
+        If descend is True (the default), we will descend into child
+        directories and files per variable values.
+
+        Arbitrary metadata in the form of a dict can be passed into this
+        function. This feature is intended to facilitate the build reader
+        injecting state and annotations into moz.build files that is
+        independent of the sandbox's execution context.
+
+        Traversal is performed depth first (for no particular reason).
+        """
+        self._execution_stack.append(path)
+        try:
+            for s in self._read_mozbuild(
+                path, config, descend=descend, metadata=metadata
+            ):
+                yield s
+
+        except BuildReaderError as bre:
+            raise bre
+
+        except SandboxCalledError as sce:
+            raise BuildReaderError(
+                list(self._execution_stack), sys.exc_info()[2], sandbox_called_error=sce
+            )
+
+        except SandboxExecutionError as se:
+            raise BuildReaderError(
+                list(self._execution_stack), sys.exc_info()[2], sandbox_exec_error=se
+            )
+
+        except SandboxLoadError as sle:
+            raise BuildReaderError(
+                list(self._execution_stack), sys.exc_info()[2], sandbox_load_error=sle
+            )
+
+        except SandboxValidationError as ve:
+            raise BuildReaderError(
+                list(self._execution_stack), sys.exc_info()[2], validation_error=ve
+            )
+
+        except Exception as e:
+            raise BuildReaderError(
+                list(self._execution_stack), sys.exc_info()[2], other_error=e
+            )
+
+    def _read_mozbuild(self, path, config, descend, metadata):
+        path = mozpath.normpath(path)
+        log(
+            self._log,
+            logging.DEBUG,
+            "read_mozbuild",
+            {"path": path},
+            "Reading file: {path}".format(path=path),
+        )
+
+        if path in self._read_files:
+            log(
+                self._log,
+                logging.WARNING,
+                "read_already",
+                {"path": path},
+                "File already read. Skipping: {path}".format(path=path),
+            )
+            return
+
+        self._read_files.add(path)
+
+        time_start = time.monotonic()
+
+        topobjdir = config.topobjdir
+
+        relpath = mozpath.relpath(path, config.topsrcdir)
+        reldir = mozpath.dirname(relpath)
+
+        if mozpath.dirname(relpath) == "js/src" and not config.substs.get(
+            "JS_STANDALONE"
+        ):
+            config = ConfigEnvironment.from_config_status(
+                mozpath.join(topobjdir, reldir, "config.status")
+            )
+            config.topobjdir = topobjdir
+
+        context = Context(VARIABLES, config, self.finder)
+        sandbox = MozbuildSandbox(context, metadata=metadata, finder=self.finder)
+        sandbox.exec_file(path)
+        self._execution_time += time.monotonic() - time_start
+        self._file_count += len(context.all_paths)
+
+        # Yield main context before doing any processing. This gives immediate
+        # consumers an opportunity to change state before our remaining
+        # processing is performed.
+        yield context
+
+        # We need the list of directories pre-gyp processing for later.
+        dirs = list(context.get("DIRS", []))
+
+        curdir = mozpath.dirname(path)
+
+        for target_dir in context.get("GYP_DIRS", []):
+            gyp_dir = context["GYP_DIRS"][target_dir]
+            for v in ("input", "variables"):
+                if not getattr(gyp_dir, v):
+                    raise SandboxValidationError(
+                        "Missing value for " 'GYP_DIRS["%s"].%s' % (target_dir, v),
+                        context,
+                    )
+
+            # The make backend assumes contexts for sub-directories are
+            # emitted after their parent, so accumulate the gyp contexts.
+            # We could emit the parent context before processing gyp
+            # configuration, but we need to add the gyp objdirs to that context
+            # first.
+            from .gyp_reader import GypProcessor
+
+            non_unified_sources = set()
+            for s in gyp_dir.non_unified_sources:
+                source = SourcePath(context, s)
+                if not self.finder.get(source.full_path):
+                    raise SandboxValidationError("Cannot find %s." % source, context)
+                non_unified_sources.add(source)
+            action_overrides = {}
+            for action, script in six.iteritems(gyp_dir.action_overrides):
+                action_overrides[action] = SourcePath(context, script)
+
+            gyp_processor = GypProcessor(
+                context.config,
+                gyp_dir,
+                mozpath.join(curdir, gyp_dir.input),
+                mozpath.join(context.objdir, target_dir),
+                self._gyp_worker_pool,
+                action_overrides,
+                non_unified_sources,
+            )
+            self._gyp_processors.append(gyp_processor)
+
+        for subcontext in sandbox.subcontexts:
+            yield subcontext
+
+        # Traverse into referenced files.
+
+        # It's very tempting to use a set here. Unfortunately, the recursive
+        # make backend needs order preserved. Once we autogenerate all backend
+        # files, we should be able to convert this to a set.
+        recurse_info = OrderedDict()
+        for d in dirs:
+            if d in recurse_info:
+                raise SandboxValidationError(
+                    "Directory (%s) registered multiple times"
+                    % (mozpath.relpath(d.full_path, context.srcdir)),
+                    context,
+                )
+
+            recurse_info[d] = {}
+            for key in sandbox.metadata:
+                if key == "exports":
+                    sandbox.recompute_exports()
+
+                recurse_info[d][key] = dict(sandbox.metadata[key])
+
+        for path, child_metadata in recurse_info.items():
+            child_path = path.join("moz.build").full_path
+
+            # Ensure we don't break out of the topsrcdir. We don't do realpath
+            # because it isn't necessary. If there are symlinks in the srcdir,
+            # that's not our problem. We're not a hosted application: we don't
+            # need to worry about security too much.
+            if not is_read_allowed(child_path, context.config):
+                raise SandboxValidationError(
+                    "Attempting to process file outside of allowed paths: %s"
+                    % child_path,
+                    context,
+                )
+
+            if not descend:
+                continue
+
+            for res in self.read_mozbuild(
+                child_path, context.config, metadata=child_metadata
+            ):
+                yield res
+
+        self._execution_stack.pop()
+
+    def _find_relevant_mozbuilds(self, paths):
+        """Given a set of filesystem paths, find all relevant moz.build files.
+
+        We assume that a moz.build file in the directory ancestry of a given path
+        is relevant to that path. Let's say we have the following files on disk::
+
+           moz.build
+           foo/moz.build
+           foo/baz/moz.build
+           foo/baz/file1
+           other/moz.build
+           other/file2
+
+        If ``foo/baz/file1`` is passed in, the relevant moz.build files are
+        ``moz.build``, ``foo/moz.build``, and ``foo/baz/moz.build``. For
+        ``other/file2``, the relevant moz.build files are ``moz.build`` and
+        ``other/moz.build``.
+
+        Returns a dict of input paths to a list of relevant moz.build files.
+        The root moz.build file is first and the leaf-most moz.build is last.
+        """
+        root = self.config.topsrcdir
+        result = {}
+
+        @memoize
+        def exists(path):
+            return self._relevant_mozbuild_finder.get(path) is not None
+
+        def itermozbuild(path):
+            subpath = ""
+            yield "moz.build"
+            for part in mozpath.split(path):
+                subpath = mozpath.join(subpath, part)
+                yield mozpath.join(subpath, "moz.build")
+
+        for path in sorted(paths):
+            path = mozpath.normpath(path)
+            if os.path.isabs(path):
+                if not mozpath.basedir(path, [root]):
+                    raise Exception("Path outside topsrcdir: %s" % path)
+                path = mozpath.relpath(path, root)
+
+            result[path] = [p for p in itermozbuild(path) if exists(p)]
+
+        return result
+
+    def read_relevant_mozbuilds(self, paths):
+        """Read and process moz.build files relevant for a set of paths.
+
+        For an iterable of relative-to-root filesystem paths ``paths``,
+        find all moz.build files that may apply to them based on filesystem
+        hierarchy and read those moz.build files.
+
+        The return value is a 2-tuple. The first item is a dict mapping each
+        input filesystem path to a list of Context instances that are relevant
+        to that path. The second item is a list of all Context instances. Each
+        Context instance is in both data structures.
+        """
+        relevants = self._find_relevant_mozbuilds(paths)
+
+        topsrcdir = self.config.topsrcdir
+
+        # Source moz.build file to directories to traverse.
+        dirs = defaultdict(set)
+        # Relevant path to absolute paths of relevant contexts.
+        path_mozbuilds = {}
+
+        # There is room to improve this code (and the code in
+        # _find_relevant_mozbuilds) to better handle multiple files in the same
+        # directory. Bug 1136966 tracks.
+        for path, mbpaths in relevants.items():
+            path_mozbuilds[path] = [mozpath.join(topsrcdir, p) for p in mbpaths]
+
+            for i, mbpath in enumerate(mbpaths[0:-1]):
+                source_dir = mozpath.dirname(mbpath)
+                target_dir = mozpath.dirname(mbpaths[i + 1])
+
+                d = mozpath.normpath(mozpath.join(topsrcdir, mbpath))
+                dirs[d].add(mozpath.relpath(target_dir, source_dir))
+
+        # Exporting doesn't work reliably in tree traversal mode. Override
+        # the function to no-op.
+        functions = dict(FUNCTIONS)
+
+        def export(sandbox):
+            return lambda varname: None
+
+        functions["export"] = tuple([export] + list(FUNCTIONS["export"][1:]))
+
+        metadata = {
+            "functions": functions,
+        }
+
+        contexts = defaultdict(list)
+        all_contexts = []
+        for context in self.read_mozbuild(
+            mozpath.join(topsrcdir, "moz.build"), self.config, metadata=metadata
+        ):
+            # Explicitly set directory traversal variables to override default
+            # traversal rules.
+            if not isinstance(context, SubContext):
+                for v in ("DIRS", "GYP_DIRS"):
+                    context[v][:] = []
+
+                context["DIRS"] = sorted(dirs[context.main_path])
+
+            contexts[context.main_path].append(context)
+            all_contexts.append(context)
+
+        result = {}
+        for path, paths in path_mozbuilds.items():
+            result[path] = six.moves.reduce(
+                lambda x, y: x + y, (contexts[p] for p in paths), []
+            )
+
+        return result, all_contexts
+
+    def files_info(self, paths):
+        """Obtain aggregate data from Files for a set of files.
+
+        Given a set of input paths, determine which moz.build files may
+        define metadata for them, evaluate those moz.build files, and
+        apply file metadata rules defined within to determine metadata
+        values for each file requested.
+
+        Essentially, for each input path:
+
+        1. Determine the set of moz.build files relevant to that file by
+           looking for moz.build files in ancestor directories.
+        2. Evaluate moz.build files starting with the most distant.
+        3. Iterate over Files sub-contexts.
+        4. If the file pattern matches the file we're seeking info on,
+           apply attribute updates.
+        5. Return the most recent value of attributes.
+        """
+        paths, _ = self.read_relevant_mozbuilds(paths)
+
+        r = {}
+
+        # Only do wildcard matching if the '*' character is present.
+        # Otherwise, mozpath.match will match directories, which we've
+        # arbitrarily chosen to not allow.
+        def path_matches_pattern(relpath, pattern):
+            if pattern == relpath:
+                return True
+
+            return "*" in pattern and mozpath.match(relpath, pattern)
+
+        for path, ctxs in paths.items():
+            # Should be normalized by read_relevant_mozbuilds.
+            assert "\\" not in path
+
+            flags = Files(Context())
+
+            for ctx in ctxs:
+                if not isinstance(ctx, Files):
+                    continue
+
+                # read_relevant_mozbuilds() normalizes paths and ensures that
+                # the contexts have paths in the ancestry of the path. When
+                # iterating over tens of thousands of paths, mozpath.relpath()
+                # can be very expensive. So, given our assumptions about paths,
+                # we implement an optimized version.
+                ctx_rel_dir = ctx.relsrcdir
+                if ctx_rel_dir:
+                    assert path.startswith(ctx_rel_dir)
+                    relpath = path[len(ctx_rel_dir) + 1 :]
+                else:
+                    relpath = path
+
+                if any(path_matches_pattern(relpath, p) for p in ctx.patterns):
+                    flags += ctx
+
+            r[path] = flags
+
+        return r