diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/python/MarkupSafe/src/markupsafe | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/MarkupSafe/src/markupsafe')
5 files changed, 711 insertions, 0 deletions
diff --git a/third_party/python/MarkupSafe/src/markupsafe/__init__.py b/third_party/python/MarkupSafe/src/markupsafe/__init__.py new file mode 100644 index 0000000000..d331ac3622 --- /dev/null +++ b/third_party/python/MarkupSafe/src/markupsafe/__init__.py @@ -0,0 +1,288 @@ +import functools +import re +import string +import typing as t + +if t.TYPE_CHECKING: + import typing_extensions as te + + class HasHTML(te.Protocol): + def __html__(self) -> str: + pass + + +__version__ = "2.0.1" + +_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") + + +def _simple_escaping_wrapper(name: str) -> t.Callable[..., "Markup"]: + orig = getattr(str, name) + + @functools.wraps(orig) + def wrapped(self: "Markup", *args: t.Any, **kwargs: t.Any) -> "Markup": + args = _escape_argspec(list(args), enumerate(args), self.escape) # type: ignore + _escape_argspec(kwargs, kwargs.items(), self.escape) + return self.__class__(orig(self, *args, **kwargs)) + + return wrapped + + +class Markup(str): + """A string that is ready to be safely inserted into an HTML or XML + document, either because it was escaped or because it was marked + safe. + + Passing an object to the constructor converts it to text and wraps + it to mark it safe without escaping. To escape the text, use the + :meth:`escape` class method instead. + + >>> Markup("Hello, <em>World</em>!") + Markup('Hello, <em>World</em>!') + >>> Markup(42) + Markup('42') + >>> Markup.escape("Hello, <em>World</em>!") + Markup('Hello <em>World</em>!') + + This implements the ``__html__()`` interface that some frameworks + use. Passing an object that implements ``__html__()`` will wrap the + output of that method, marking it safe. + + >>> class Foo: + ... def __html__(self): + ... return '<a href="/foo">foo</a>' + ... + >>> Markup(Foo()) + Markup('<a href="/foo">foo</a>') + + This is a subclass of :class:`str`. It has the same methods, but + escapes their arguments and returns a ``Markup`` instance. + + >>> Markup("<em>%s</em>") % ("foo & bar",) + Markup('<em>foo & bar</em>') + >>> Markup("<em>Hello</em> ") + "<foo>" + Markup('<em>Hello</em> <foo>') + """ + + __slots__ = () + + def __new__( + cls, base: t.Any = "", encoding: t.Optional[str] = None, errors: str = "strict" + ) -> "Markup": + if hasattr(base, "__html__"): + base = base.__html__() + + if encoding is None: + return super().__new__(cls, base) + + return super().__new__(cls, base, encoding, errors) + + def __html__(self) -> "Markup": + return self + + def __add__(self, other: t.Union[str, "HasHTML"]) -> "Markup": + if isinstance(other, str) or hasattr(other, "__html__"): + return self.__class__(super().__add__(self.escape(other))) + + return NotImplemented + + def __radd__(self, other: t.Union[str, "HasHTML"]) -> "Markup": + if isinstance(other, str) or hasattr(other, "__html__"): + return self.escape(other).__add__(self) + + return NotImplemented + + def __mul__(self, num: int) -> "Markup": + if isinstance(num, int): + return self.__class__(super().__mul__(num)) + + return NotImplemented # type: ignore + + __rmul__ = __mul__ + + def __mod__(self, arg: t.Any) -> "Markup": + if isinstance(arg, tuple): + arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) + else: + arg = _MarkupEscapeHelper(arg, self.escape) + + return self.__class__(super().__mod__(arg)) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({super().__repr__()})" + + def join(self, seq: t.Iterable[t.Union[str, "HasHTML"]]) -> "Markup": + return self.__class__(super().join(map(self.escape, seq))) + + join.__doc__ = str.join.__doc__ + + def split( # type: ignore + self, sep: t.Optional[str] = None, maxsplit: int = -1 + ) -> t.List["Markup"]: + return [self.__class__(v) for v in super().split(sep, maxsplit)] + + split.__doc__ = str.split.__doc__ + + def rsplit( # type: ignore + self, sep: t.Optional[str] = None, maxsplit: int = -1 + ) -> t.List["Markup"]: + return [self.__class__(v) for v in super().rsplit(sep, maxsplit)] + + rsplit.__doc__ = str.rsplit.__doc__ + + def splitlines(self, keepends: bool = False) -> t.List["Markup"]: # type: ignore + return [self.__class__(v) for v in super().splitlines(keepends)] + + splitlines.__doc__ = str.splitlines.__doc__ + + def unescape(self) -> str: + """Convert escaped markup back into a text string. This replaces + HTML entities with the characters they represent. + + >>> Markup("Main » <em>About</em>").unescape() + 'Main » <em>About</em>' + """ + from html import unescape + + return unescape(str(self)) + + def striptags(self) -> str: + """:meth:`unescape` the markup, remove tags, and normalize + whitespace to single spaces. + + >>> Markup("Main »\t<em>About</em>").striptags() + 'Main » About' + """ + stripped = " ".join(_striptags_re.sub("", self).split()) + return Markup(stripped).unescape() + + @classmethod + def escape(cls, s: t.Any) -> "Markup": + """Escape a string. Calls :func:`escape` and ensures that for + subclasses the correct type is returned. + """ + rv = escape(s) + + if rv.__class__ is not cls: + return cls(rv) + + return rv + + for method in ( + "__getitem__", + "capitalize", + "title", + "lower", + "upper", + "replace", + "ljust", + "rjust", + "lstrip", + "rstrip", + "center", + "strip", + "translate", + "expandtabs", + "swapcase", + "zfill", + ): + locals()[method] = _simple_escaping_wrapper(method) + + del method + + def partition(self, sep: str) -> t.Tuple["Markup", "Markup", "Markup"]: + l, s, r = super().partition(self.escape(sep)) + cls = self.__class__ + return cls(l), cls(s), cls(r) + + def rpartition(self, sep: str) -> t.Tuple["Markup", "Markup", "Markup"]: + l, s, r = super().rpartition(self.escape(sep)) + cls = self.__class__ + return cls(l), cls(s), cls(r) + + def format(self, *args: t.Any, **kwargs: t.Any) -> "Markup": + formatter = EscapeFormatter(self.escape) + return self.__class__(formatter.vformat(self, args, kwargs)) + + def __html_format__(self, format_spec: str) -> "Markup": + if format_spec: + raise ValueError("Unsupported format specification for Markup.") + + return self + + +class EscapeFormatter(string.Formatter): + __slots__ = ("escape",) + + def __init__(self, escape: t.Callable[[t.Any], Markup]) -> None: + self.escape = escape + super().__init__() + + def format_field(self, value: t.Any, format_spec: str) -> str: + if hasattr(value, "__html_format__"): + rv = value.__html_format__(format_spec) + elif hasattr(value, "__html__"): + if format_spec: + raise ValueError( + f"Format specifier {format_spec} given, but {type(value)} does not" + " define __html_format__. A class that defines __html__ must define" + " __html_format__ to work with format specifiers." + ) + rv = value.__html__() + else: + # We need to make sure the format spec is str here as + # otherwise the wrong callback methods are invoked. + rv = string.Formatter.format_field(self, value, str(format_spec)) + return str(self.escape(rv)) + + +_ListOrDict = t.TypeVar("_ListOrDict", list, dict) + + +def _escape_argspec( + obj: _ListOrDict, iterable: t.Iterable[t.Any], escape: t.Callable[[t.Any], Markup] +) -> _ListOrDict: + """Helper for various string-wrapped functions.""" + for key, value in iterable: + if isinstance(value, str) or hasattr(value, "__html__"): + obj[key] = escape(value) + + return obj + + +class _MarkupEscapeHelper: + """Helper for :meth:`Markup.__mod__`.""" + + __slots__ = ("obj", "escape") + + def __init__(self, obj: t.Any, escape: t.Callable[[t.Any], Markup]) -> None: + self.obj = obj + self.escape = escape + + def __getitem__(self, item: t.Any) -> "_MarkupEscapeHelper": + return _MarkupEscapeHelper(self.obj[item], self.escape) + + def __str__(self) -> str: + return str(self.escape(self.obj)) + + def __repr__(self) -> str: + return str(self.escape(repr(self.obj))) + + def __int__(self) -> int: + return int(self.obj) + + def __float__(self) -> float: + return float(self.obj) + + +# circular import +try: + from ._speedups import escape as escape + from ._speedups import escape_silent as escape_silent + from ._speedups import soft_str as soft_str + from ._speedups import soft_unicode +except ImportError: + from ._native import escape as escape + from ._native import escape_silent as escape_silent # noqa: F401 + from ._native import soft_str as soft_str # noqa: F401 + from ._native import soft_unicode # noqa: F401 diff --git a/third_party/python/MarkupSafe/src/markupsafe/_native.py b/third_party/python/MarkupSafe/src/markupsafe/_native.py new file mode 100644 index 0000000000..6f7eb7a8cb --- /dev/null +++ b/third_party/python/MarkupSafe/src/markupsafe/_native.py @@ -0,0 +1,75 @@ +import typing as t + +from . import Markup + + +def escape(s: t.Any) -> Markup: + """Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in + the string with HTML-safe sequences. Use this if you need to display + text that might contain such characters in HTML. + + If the object has an ``__html__`` method, it is called and the + return value is assumed to already be safe for HTML. + + :param s: An object to be converted to a string and escaped. + :return: A :class:`Markup` string with the escaped text. + """ + if hasattr(s, "__html__"): + return Markup(s.__html__()) + + return Markup( + str(s) + .replace("&", "&") + .replace(">", ">") + .replace("<", "<") + .replace("'", "'") + .replace('"', """) + ) + + +def escape_silent(s: t.Optional[t.Any]) -> Markup: + """Like :func:`escape` but treats ``None`` as the empty string. + Useful with optional values, as otherwise you get the string + ``'None'`` when the value is ``None``. + + >>> escape(None) + Markup('None') + >>> escape_silent(None) + Markup('') + """ + if s is None: + return Markup() + + return escape(s) + + +def soft_str(s: t.Any) -> str: + """Convert an object to a string if it isn't already. This preserves + a :class:`Markup` string rather than converting it back to a basic + string, so it will still be marked as safe and won't be escaped + again. + + >>> value = escape("<User 1>") + >>> value + Markup('<User 1>') + >>> escape(str(value)) + Markup('&lt;User 1&gt;') + >>> escape(soft_str(value)) + Markup('<User 1>') + """ + if not isinstance(s, str): + return str(s) + + return s + + +def soft_unicode(s: t.Any) -> str: + import warnings + + warnings.warn( + "'soft_unicode' has been renamed to 'soft_str'. The old name" + " will be removed in MarkupSafe 2.1.", + DeprecationWarning, + stacklevel=2, + ) + return soft_str(s) diff --git a/third_party/python/MarkupSafe/src/markupsafe/_speedups.c b/third_party/python/MarkupSafe/src/markupsafe/_speedups.c new file mode 100644 index 0000000000..44967b1fdc --- /dev/null +++ b/third_party/python/MarkupSafe/src/markupsafe/_speedups.c @@ -0,0 +1,339 @@ +#include <Python.h> + +static PyObject* markup; + +static int +init_constants(void) +{ + PyObject *module; + + /* import markup type so that we can mark the return value */ + module = PyImport_ImportModule("markupsafe"); + if (!module) + return 0; + markup = PyObject_GetAttrString(module, "Markup"); + Py_DECREF(module); + + return 1; +} + +#define GET_DELTA(inp, inp_end, delta) \ + while (inp < inp_end) { \ + switch (*inp++) { \ + case '"': \ + case '\'': \ + case '&': \ + delta += 4; \ + break; \ + case '<': \ + case '>': \ + delta += 3; \ + break; \ + } \ + } + +#define DO_ESCAPE(inp, inp_end, outp) \ + { \ + Py_ssize_t ncopy = 0; \ + while (inp < inp_end) { \ + switch (*inp) { \ + case '"': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = '#'; \ + *outp++ = '3'; \ + *outp++ = '4'; \ + *outp++ = ';'; \ + break; \ + case '\'': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = '#'; \ + *outp++ = '3'; \ + *outp++ = '9'; \ + *outp++ = ';'; \ + break; \ + case '&': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'a'; \ + *outp++ = 'm'; \ + *outp++ = 'p'; \ + *outp++ = ';'; \ + break; \ + case '<': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'l'; \ + *outp++ = 't'; \ + *outp++ = ';'; \ + break; \ + case '>': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'g'; \ + *outp++ = 't'; \ + *outp++ = ';'; \ + break; \ + default: \ + ncopy++; \ + } \ + inp++; \ + } \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + } + +static PyObject* +escape_unicode_kind1(PyUnicodeObject *in) +{ + Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in); + Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS1 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, + PyUnicode_IS_ASCII(in) ? 127 : 255); + if (!out) + return NULL; + + inp = PyUnicode_1BYTE_DATA(in); + outp = PyUnicode_1BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode_kind2(PyUnicodeObject *in) +{ + Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in); + Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS2 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535); + if (!out) + return NULL; + + inp = PyUnicode_2BYTE_DATA(in); + outp = PyUnicode_2BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + + +static PyObject* +escape_unicode_kind4(PyUnicodeObject *in) +{ + Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in); + Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS4 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111); + if (!out) + return NULL; + + inp = PyUnicode_4BYTE_DATA(in); + outp = PyUnicode_4BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode(PyUnicodeObject *in) +{ + if (PyUnicode_READY(in)) + return NULL; + + switch (PyUnicode_KIND(in)) { + case PyUnicode_1BYTE_KIND: + return escape_unicode_kind1(in); + case PyUnicode_2BYTE_KIND: + return escape_unicode_kind2(in); + case PyUnicode_4BYTE_KIND: + return escape_unicode_kind4(in); + } + assert(0); /* shouldn't happen */ + return NULL; +} + +static PyObject* +escape(PyObject *self, PyObject *text) +{ + static PyObject *id_html; + PyObject *s = NULL, *rv = NULL, *html; + + if (id_html == NULL) { + id_html = PyUnicode_InternFromString("__html__"); + if (id_html == NULL) { + return NULL; + } + } + + /* we don't have to escape integers, bools or floats */ + if (PyLong_CheckExact(text) || + PyFloat_CheckExact(text) || PyBool_Check(text) || + text == Py_None) + return PyObject_CallFunctionObjArgs(markup, text, NULL); + + /* if the object has an __html__ method that performs the escaping */ + html = PyObject_GetAttr(text ,id_html); + if (html) { + s = PyObject_CallObject(html, NULL); + Py_DECREF(html); + if (s == NULL) { + return NULL; + } + /* Convert to Markup object */ + rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); + Py_DECREF(s); + return rv; + } + + /* otherwise make the object unicode if it isn't, then escape */ + PyErr_Clear(); + if (!PyUnicode_Check(text)) { + PyObject *unicode = PyObject_Str(text); + if (!unicode) + return NULL; + s = escape_unicode((PyUnicodeObject*)unicode); + Py_DECREF(unicode); + } + else + s = escape_unicode((PyUnicodeObject*)text); + + /* convert the unicode string into a markup object. */ + rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); + Py_DECREF(s); + return rv; +} + + +static PyObject* +escape_silent(PyObject *self, PyObject *text) +{ + if (text != Py_None) + return escape(self, text); + return PyObject_CallFunctionObjArgs(markup, NULL); +} + + +static PyObject* +soft_str(PyObject *self, PyObject *s) +{ + if (!PyUnicode_Check(s)) + return PyObject_Str(s); + Py_INCREF(s); + return s; +} + + +static PyObject* +soft_unicode(PyObject *self, PyObject *s) +{ + PyErr_WarnEx( + PyExc_DeprecationWarning, + "'soft_unicode' has been renamed to 'soft_str'. The old name" + " will be removed in MarkupSafe 2.1.", + 2 + ); + return soft_str(self, s); +} + + +static PyMethodDef module_methods[] = { + { + "escape", + (PyCFunction)escape, + METH_O, + "Replace the characters ``&``, ``<``, ``>``, ``'``, and ``\"`` in" + " the string with HTML-safe sequences. Use this if you need to display" + " text that might contain such characters in HTML.\n\n" + "If the object has an ``__html__`` method, it is called and the" + " return value is assumed to already be safe for HTML.\n\n" + ":param s: An object to be converted to a string and escaped.\n" + ":return: A :class:`Markup` string with the escaped text.\n" + }, + { + "escape_silent", + (PyCFunction)escape_silent, + METH_O, + "Like :func:`escape` but treats ``None`` as the empty string." + " Useful with optional values, as otherwise you get the string" + " ``'None'`` when the value is ``None``.\n\n" + ">>> escape(None)\n" + "Markup('None')\n" + ">>> escape_silent(None)\n" + "Markup('')\n" + }, + { + "soft_str", + (PyCFunction)soft_str, + METH_O, + "Convert an object to a string if it isn't already. This preserves" + " a :class:`Markup` string rather than converting it back to a basic" + " string, so it will still be marked as safe and won't be escaped" + " again.\n\n" + ">>> value = escape(\"<User 1>\")\n" + ">>> value\n" + "Markup('<User 1>')\n" + ">>> escape(str(value))\n" + "Markup('&lt;User 1&gt;')\n" + ">>> escape(soft_str(value))\n" + "Markup('<User 1>')\n" + }, + { + "soft_unicode", + (PyCFunction)soft_unicode, + METH_O, + "" + }, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef module_definition = { + PyModuleDef_HEAD_INIT, + "markupsafe._speedups", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__speedups(void) +{ + if (!init_constants()) + return NULL; + + return PyModule_Create(&module_definition); +} diff --git a/third_party/python/MarkupSafe/src/markupsafe/_speedups.pyi b/third_party/python/MarkupSafe/src/markupsafe/_speedups.pyi new file mode 100644 index 0000000000..f673240f6d --- /dev/null +++ b/third_party/python/MarkupSafe/src/markupsafe/_speedups.pyi @@ -0,0 +1,9 @@ +from typing import Any +from typing import Optional + +from . import Markup + +def escape(s: Any) -> Markup: ... +def escape_silent(s: Optional[Any]) -> Markup: ... +def soft_str(s: Any) -> str: ... +def soft_unicode(s: Any) -> str: ... diff --git a/third_party/python/MarkupSafe/src/markupsafe/py.typed b/third_party/python/MarkupSafe/src/markupsafe/py.typed new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/MarkupSafe/src/markupsafe/py.typed |