summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozbuild/frontend/sandbox.py
blob: 088e817cb08a6768c78f5e2daac2d92eadcb2770 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

r"""Python sandbox implementation for build files.

This module contains classes for Python sandboxes that execute in a
highly-controlled environment.

The main class is `Sandbox`. This provides an execution environment for Python
code and is used to fill a Context instance for the takeaway information from
the execution.

Code in this module takes a different approach to exception handling compared
to what you'd see elsewhere in Python. Arguments to built-in exceptions like
KeyError are machine parseable. This machine-friendly data is used to present
user-friendly error messages in the case of errors.
"""

import os
import sys
import weakref

import six
from mozpack.files import FileFinder

from mozbuild.util import ReadOnlyDict, exec_

from .context import Context

default_finder = FileFinder("/")


def alphabetical_sorted(iterable, key=lambda x: x.lower(), reverse=False):
    """sorted() replacement for the sandbox, ordering alphabetically by
    default.
    """
    return sorted(iterable, key=key, reverse=reverse)


class SandboxError(Exception):
    def __init__(self, file_stack):
        self.file_stack = file_stack


class SandboxExecutionError(SandboxError):
    """Represents errors encountered during execution of a Sandbox.

    This is a simple container exception. It's purpose is to capture state
    so something else can report on it.
    """

    def __init__(self, file_stack, exc_type, exc_value, trace):
        SandboxError.__init__(self, file_stack)

        self.exc_type = exc_type
        self.exc_value = exc_value
        self.trace = trace


class SandboxLoadError(SandboxError):
    """Represents errors encountered when loading a file for execution.

    This exception represents errors in a Sandbox that occurred as part of
    loading a file. The error could have occurred in the course of executing
    a file. If so, the file_stack will be non-empty and the file that caused
    the load will be on top of the stack.
    """

    def __init__(self, file_stack, trace, illegal_path=None, read_error=None):
        SandboxError.__init__(self, file_stack)

        self.trace = trace
        self.illegal_path = illegal_path
        self.read_error = read_error


class Sandbox(dict):
    """Represents a sandbox for executing Python code.

    This class provides a sandbox for execution of a single mozbuild frontend
    file. The results of that execution is stored in the Context instance given
    as the ``context`` argument.

    Sandbox is effectively a glorified wrapper around compile() + exec(). You
    point it at some Python code and it executes it. The main difference from
    executing Python code like normal is that the executed code is very limited
    in what it can do: the sandbox only exposes a very limited set of Python
    functionality. Only specific types and functions are available. This
    prevents executed code from doing things like import modules, open files,
    etc.

    Sandbox instances act as global namespace for the sandboxed execution
    itself. They shall not be used to access the results of the execution.
    Those results are available in the given Context instance after execution.

    The Sandbox itself is responsible for enforcing rules such as forbidding
    reassignment of variables.

    Implementation note: Sandbox derives from dict because exec() insists that
    what it is given for namespaces is a dict.
    """

    # The default set of builtins.
    BUILTINS = ReadOnlyDict(
        {
            # Only real Python built-ins should go here.
            "None": None,
            "False": False,
            "True": True,
            "sorted": alphabetical_sorted,
            "int": int,
            "set": set,
            "tuple": tuple,
        }
    )

    def __init__(self, context, finder=default_finder):
        """Initialize a Sandbox ready for execution."""
        self._builtins = self.BUILTINS
        dict.__setitem__(self, "__builtins__", self._builtins)

        assert isinstance(self._builtins, ReadOnlyDict)
        assert isinstance(context, Context)

        # Contexts are modeled as a stack because multiple context managers
        # may be active.
        self._active_contexts = [context]

        # Seen sub-contexts. Will be populated with other Context instances
        # that were related to execution of this instance.
        self.subcontexts = []

        # We need to record this because it gets swallowed as part of
        # evaluation.
        self._last_name_error = None

        # Current literal source being executed.
        self._current_source = None

        self._finder = finder

    @property
    def _context(self):
        return self._active_contexts[-1]

    def exec_file(self, path):
        """Execute code at a path in the sandbox.

        The path must be absolute.
        """
        assert os.path.isabs(path)

        try:
            source = six.ensure_text(self._finder.get(path).read())
        except Exception:
            raise SandboxLoadError(
                self._context.source_stack, sys.exc_info()[2], read_error=path
            )

        self.exec_source(source, path)

    def exec_source(self, source, path=""):
        """Execute Python code within a string.

        The passed string should contain Python code to be executed. The string
        will be compiled and executed.

        You should almost always go through exec_file() because exec_source()
        does not perform extra path normalization. This can cause relative
        paths to behave weirdly.
        """

        def execute():
            # compile() inherits the __future__ from the module by default. We
            # do want Unicode literals.
            code = compile(source, path, "exec")
            # We use ourself as the global namespace for the execution. There
            # is no need for a separate local namespace as moz.build execution
            # is flat, namespace-wise.
            old_source = self._current_source
            self._current_source = source
            try:
                exec_(code, self)
            finally:
                self._current_source = old_source

        self.exec_function(execute, path=path)

    def exec_function(
        self, func, args=(), kwargs={}, path="", becomes_current_path=True
    ):
        """Execute function with the given arguments in the sandbox."""
        if path and becomes_current_path:
            self._context.push_source(path)

        old_sandbox = self._context._sandbox
        self._context._sandbox = weakref.ref(self)

        # We don't have to worry about bytecode generation here because we are
        # too low-level for that. However, we could add bytecode generation via
        # the marshall module if parsing performance were ever an issue.

        old_source = self._current_source
        self._current_source = None
        try:
            func(*args, **kwargs)
        except SandboxError as e:
            raise e
        except NameError as e:
            # A NameError is raised when a variable could not be found.
            # The original KeyError has been dropped by the interpreter.
            # However, we should have it cached in our instance!

            # Unless a script is doing something wonky like catching NameError
            # itself (that would be silly), if there is an exception on the
            # global namespace, that's our error.
            actual = e

            if self._last_name_error is not None:
                actual = self._last_name_error
            source_stack = self._context.source_stack
            if not becomes_current_path:
                # Add current file to the stack because it wasn't added before
                # sandbox execution.
                source_stack.append(path)
            raise SandboxExecutionError(
                source_stack, type(actual), actual, sys.exc_info()[2]
            )

        except Exception:
            # Need to copy the stack otherwise we get a reference and that is
            # mutated during the finally.
            exc = sys.exc_info()
            source_stack = self._context.source_stack
            if not becomes_current_path:
                # Add current file to the stack because it wasn't added before
                # sandbox execution.
                source_stack.append(path)
            raise SandboxExecutionError(source_stack, exc[0], exc[1], exc[2])
        finally:
            self._current_source = old_source
            self._context._sandbox = old_sandbox
            if path and becomes_current_path:
                self._context.pop_source()

    def push_subcontext(self, context):
        """Push a SubContext onto the execution stack.

        When called, the active context will be set to the specified context,
        meaning all variable accesses will go through it. We also record this
        SubContext as having been executed as part of this sandbox.
        """
        self._active_contexts.append(context)
        if context not in self.subcontexts:
            self.subcontexts.append(context)

    def pop_subcontext(self, context):
        """Pop a SubContext off the execution stack.

        SubContexts must be pushed and popped in opposite order. This is
        validated as part of the function call to ensure proper consumer API
        use.
        """
        popped = self._active_contexts.pop()
        assert popped == context

    def __getitem__(self, key):
        if key.isupper():
            try:
                return self._context[key]
            except Exception as e:
                self._last_name_error = e
                raise

        return dict.__getitem__(self, key)

    def __setitem__(self, key, value):
        if key in self._builtins or key == "__builtins__":
            raise KeyError("Cannot reassign builtins")

        if key.isupper():
            # Forbid assigning over a previously set value. Interestingly, when
            # doing FOO += ['bar'], python actually does something like:
            #   foo = namespace.__getitem__('FOO')
            #   foo.__iadd__(['bar'])
            #   namespace.__setitem__('FOO', foo)
            # This means __setitem__ is called with the value that is already
            # in the dict, when doing +=, which is permitted.
            if key in self._context and self._context[key] is not value:
                raise KeyError("global_ns", "reassign", key)

            if (
                key not in self._context
                and isinstance(value, (list, dict))
                and not value
            ):
                raise KeyError("Variable %s assigned an empty value." % key)

            self._context[key] = value
        else:
            dict.__setitem__(self, key, value)

    def get(self, key, default=None):
        raise NotImplementedError("Not supported")

    def __iter__(self):
        raise NotImplementedError("Not supported")

    def __contains__(self, key):
        if key.isupper():
            return key in self._context
        return dict.__contains__(self, key)