diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 16:10:54 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 16:10:54 +0000 |
commit | 3b2c8da6b3117ca186e27a7f94fa44b17f6a82ed (patch) | |
tree | d8d343b76ef070019c69531e4bd0f0964804e4d4 | |
parent | Initial commit. (diff) | |
download | markupsafe-3b2c8da6b3117ca186e27a7f94fa44b17f6a82ed.tar.xz markupsafe-3b2c8da6b3117ca186e27a7f94fa44b17f6a82ed.zip |
Adding upstream version 2.1.2.upstream/2.1.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
36 files changed, 2165 insertions, 0 deletions
diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..81294a0 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,142 @@ +Version 2.1.2 +------------- + +Released 2023-01-17 + +- Fix ``striptags`` not stripping tags containing newlines. + :issue:`310` + + +Version 2.1.1 +------------- + +Released 2022-03-14 + +- Avoid ambiguous regex matches in ``striptags``. :pr:`293` + + +Version 2.1.0 +------------- + +Released 2022-02-17 + +- Drop support for Python 3.6. :pr:`262` +- Remove ``soft_unicode``, which was previously deprecated. Use + ``soft_str`` instead. :pr:`261` +- Raise error on missing single placeholder during string + interpolation. :issue:`225` +- Disable speedups module for GraalPython. :issue:`277` + + +Version 2.0.1 +------------- + +Released 2021-05-18 + +- Mark top-level names as exported so type checking understands + imports in user projects. :pr:`215` +- Fix some types that weren't available in Python 3.6.0. :pr:`215` + + +Version 2.0.0 +------------- + +Released 2021-05-11 + +- Drop Python 2.7, 3.4, and 3.5 support. +- ``Markup.unescape`` uses :func:`html.unescape` to support HTML5 + character references. :pr:`117` +- Add type annotations for static typing tools. :pr:`149` + + +Version 1.1.1 +------------- + +Released 2019-02-23 + +- Fix segfault when ``__html__`` method raises an exception when using + the C speedups. The exception is now propagated correctly. :pr:`109` + + +Version 1.1.0 +------------- + +Released 2018-11-05 + +- Drop support for Python 2.6 and 3.3. +- Build wheels for Linux, Mac, and Windows, allowing systems without + a compiler to take advantage of the C extension speedups. :pr:`104` +- Use newer CPython API on Python 3, resulting in a 1.5x speedup. + :pr:`64` +- ``escape`` wraps ``__html__`` result in ``Markup``, consistent with + documented behavior. :pr:`69` + + +Version 1.0 +----------- + +Released 2017-03-07 + +- Fixed custom types not invoking ``__unicode__`` when used with + ``format()``. +- Added ``__version__`` module attribute. +- Improve unescape code to leave lone ampersands alone. + + +Version 0.18 +------------ + +Released 2013-05-22 + +- Fixed ``__mul__`` and string splitting on Python 3. + + +Version 0.17 +------------ + +Released 2013-05-21 + +- Fixed a bug with broken interpolation on tuples. + + +Version 0.16 +------------ + +Released 2013-05-20 + +- Improved Python 3 Support and removed 2to3. +- Removed support for Python 3.2 and 2.5. + + +Version 0.15 +------------ + +Released 2011-07-20 + +- Fixed a typo that caused the library to fail to install on pypy and + jython. + + +Version 0.14 +------------ + +Released 2011-07-20 + +- Release fix for 0.13. + + +Version 0.13 +------------ + +Released 2011-07-20 + +- Do not attempt to compile extension for PyPy or Jython. +- Work around some 64bit Windows issues. + + +Version 0.12 +------------ + +Released 2011-02-17 + +- Improved PyPy compatibility. diff --git a/LICENSE.rst b/LICENSE.rst new file mode 100644 index 0000000..9d227a0 --- /dev/null +++ b/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2010 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..7dfa3f6 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include CHANGES.rst +include tox.ini +include requirements/*.txt +graft docs +prune docs/_build +graft tests +include src/markupsafe/py.typed +include src/markupsafe/*.pyi +global-exclude *.pyc diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..4a34999 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,98 @@ +Metadata-Version: 2.1 +Name: MarkupSafe +Version: 2.1.2 +Summary: Safely add untrusted strings to HTML/XML markup. +Home-page: https://palletsprojects.com/p/markupsafe/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: Pallets +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Donate, https://palletsprojects.com/donate +Project-URL: Documentation, https://markupsafe.palletsprojects.com/ +Project-URL: Changes, https://markupsafe.palletsprojects.com/changes/ +Project-URL: Source Code, https://github.com/pallets/markupsafe/ +Project-URL: Issue Tracker, https://github.com/pallets/markupsafe/issues/ +Project-URL: Twitter, https://twitter.com/PalletsTeam +Project-URL: Chat, https://discord.gg/pallets +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Text Processing :: Markup :: HTML +Requires-Python: >=3.7 +Description-Content-Type: text/x-rst +License-File: LICENSE.rst + +MarkupSafe +========== + +MarkupSafe implements a text object that escapes characters so it is +safe to use in HTML and XML. Characters that have special meanings are +replaced so that they display as the actual characters. This mitigates +injection attacks, meaning untrusted user input can safely be displayed +on a page. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U MarkupSafe + +.. _pip: https://pip.pypa.io/en/stable/getting-started/ + + +Examples +-------- + +.. code-block:: pycon + + >>> from markupsafe import Markup, escape + + >>> # escape replaces special characters and wraps in Markup + >>> escape("<script>alert(document.cookie);</script>") + Markup('<script>alert(document.cookie);</script>') + + >>> # wrap in Markup to mark text "safe" and prevent escaping + >>> Markup("<strong>Hello</strong>") + Markup('<strong>hello</strong>') + + >>> escape(Markup("<strong>Hello</strong>")) + Markup('<strong>hello</strong>') + + >>> # Markup is a str subclass + >>> # methods and operators escape their arguments + >>> template = Markup("Hello <em>{name}</em>") + >>> template.format(name='"World"') + Markup('Hello <em>"World"</em>') + + +Donate +------ + +The Pallets organization develops and supports MarkupSafe and other +popular packages. In order to grow the community of contributors and +users, and allow the maintainers to devote more time to the projects, +`please donate today`_. + +.. _please donate today: https://palletsprojects.com/donate + + +Links +----- + +- Documentation: https://markupsafe.palletsprojects.com/ +- Changes: https://markupsafe.palletsprojects.com/changes/ +- PyPI Releases: https://pypi.org/project/MarkupSafe/ +- Source Code: https://github.com/pallets/markupsafe/ +- Issue Tracker: https://github.com/pallets/markupsafe/issues/ +- Website: https://palletsprojects.com/p/markupsafe/ +- Twitter: https://twitter.com/PalletsTeam +- Chat: https://discord.gg/pallets diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..0b27612 --- /dev/null +++ b/README.rst @@ -0,0 +1,69 @@ +MarkupSafe +========== + +MarkupSafe implements a text object that escapes characters so it is +safe to use in HTML and XML. Characters that have special meanings are +replaced so that they display as the actual characters. This mitigates +injection attacks, meaning untrusted user input can safely be displayed +on a page. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U MarkupSafe + +.. _pip: https://pip.pypa.io/en/stable/getting-started/ + + +Examples +-------- + +.. code-block:: pycon + + >>> from markupsafe import Markup, escape + + >>> # escape replaces special characters and wraps in Markup + >>> escape("<script>alert(document.cookie);</script>") + Markup('<script>alert(document.cookie);</script>') + + >>> # wrap in Markup to mark text "safe" and prevent escaping + >>> Markup("<strong>Hello</strong>") + Markup('<strong>hello</strong>') + + >>> escape(Markup("<strong>Hello</strong>")) + Markup('<strong>hello</strong>') + + >>> # Markup is a str subclass + >>> # methods and operators escape their arguments + >>> template = Markup("Hello <em>{name}</em>") + >>> template.format(name='"World"') + Markup('Hello <em>"World"</em>') + + +Donate +------ + +The Pallets organization develops and supports MarkupSafe and other +popular packages. In order to grow the community of contributors and +users, and allow the maintainers to devote more time to the projects, +`please donate today`_. + +.. _please donate today: https://palletsprojects.com/donate + + +Links +----- + +- Documentation: https://markupsafe.palletsprojects.com/ +- Changes: https://markupsafe.palletsprojects.com/changes/ +- PyPI Releases: https://pypi.org/project/MarkupSafe/ +- Source Code: https://github.com/pallets/markupsafe/ +- Issue Tracker: https://github.com/pallets/markupsafe/issues/ +- Website: https://palletsprojects.com/p/markupsafe/ +- Twitter: https://twitter.com/PalletsTeam +- Chat: https://discord.gg/pallets diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..5128596 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/changes.rst b/docs/changes.rst new file mode 100644 index 0000000..955deaf --- /dev/null +++ b/docs/changes.rst @@ -0,0 +1,4 @@ +Changes +======= + +.. include:: ../CHANGES.rst diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..bba34c0 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,52 @@ +from pallets_sphinx_themes import get_version +from pallets_sphinx_themes import ProjectLink + +# Project -------------------------------------------------------------- + +project = "MarkupSafe" +copyright = "2010 Pallets" +author = "Pallets" +release, version = get_version("MarkupSafe") + +# General -------------------------------------------------------------- + +master_doc = "index" +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "pallets_sphinx_themes", + "sphinxcontrib.log_cabinet", + "sphinx_issues", +] +autodoc_typehints = "description" +intersphinx_mapping = {"python": ("https://docs.python.org/3/", None)} +issues_github_path = "pallets/markupsafe" + +# HTML ----------------------------------------------------------------- + +html_theme = "jinja" +html_theme_options = {"index_sidebar_logo": False} +html_context = { + "project_links": [ + ProjectLink("Donate", "https://palletsprojects.com/donate"), + ProjectLink("PyPI Releases", "https://pypi.org/project/MarkupSafe/"), + ProjectLink("Source Code", "https://github.com/pallets/markupsafe/"), + ProjectLink("Issue Tracker", "https://github.com/pallets/markupsafe/issues/"), + ProjectLink("Website", "https://palletsprojects.com/p/markupsafe/"), + ProjectLink("Twitter", "https://twitter.com/PalletsTeam"), + ProjectLink("Chat", "https://discord.gg/pallets"), + ] +} +html_sidebars = { + "index": ["project.html", "localtoc.html", "searchbox.html", "ethicalads.html"], + "**": ["localtoc.html", "relations.html", "searchbox.html", "ethicalads.html"], +} +singlehtml_sidebars = {"index": ["project.html", "localtoc.html", "ethicalads.html"]} +html_title = f"MarkupSafe Documentation ({version})" +html_show_sourcelink = False + +# LaTeX ---------------------------------------------------------------- + +latex_documents = [ + (master_doc, f"MarkupSafe-{version}.tex", html_title, author, "manual") +] diff --git a/docs/escaping.rst b/docs/escaping.rst new file mode 100644 index 0000000..9e7000a --- /dev/null +++ b/docs/escaping.rst @@ -0,0 +1,21 @@ +.. module:: markupsafe + +Working With Safe Text +====================== + +.. autofunction:: escape + +.. autoclass:: Markup + :members: escape, unescape, striptags + + +Optional Values +--------------- + +.. autofunction:: escape_silent + + +Convert an Object to a String +----------------------------- + +.. autofunction:: soft_str diff --git a/docs/formatting.rst b/docs/formatting.rst new file mode 100644 index 0000000..c14f917 --- /dev/null +++ b/docs/formatting.rst @@ -0,0 +1,77 @@ +.. currentmodule:: markupsafe + +String Formatting +================= + +The :class:`Markup` class can be used as a format string. Objects +formatted into a markup string will be escaped first. + + +Format Method +------------- + +The ``format`` method extends the standard :meth:`str.format` behavior +to use an ``__html_format__`` method. + +#. If an object has an ``__html_format__`` method, it is called as a + replacement for the ``__format__`` method. It is passed a format + specifier if it's given. The method must return a string or + :class:`Markup` instance. + +#. If an object has an ``__html__`` method, it is called. If a format + specifier was passed and the class defined ``__html__`` but not + ``__html_format__``, a ``ValueError`` is raised. + +#. Otherwise Python's default format behavior is used and the result + is escaped. + +For example, to implement a ``User`` that wraps its ``name`` in a +``span`` tag, and adds a link when using the ``"link"`` format +specifier: + +.. code-block:: python + + class User(object): + def __init__(self, id, name): + self.id = id + self.name = name + + def __html_format__(self, format_spec): + if format_spec == "link": + return Markup( + '<a href="/user/{}">{}</a>' + ).format(self.id, self.__html__()) + elif format_spec: + raise ValueError("Invalid format spec") + return self.__html__() + + def __html__(self): + return Markup( + '<span class="user">{0}</span>' + ).format(self.name) + + +.. code-block:: pycon + + >>> user = User(3, "<script>") + >>> escape(user) + Markup('<span class="user"><script></span>') + >>> Markup("<p>User: {user:link}").format(user=user) + Markup('<p>User: <a href="/user/3"><span class="user"><script></span></a> + +See Python's docs on :ref:`format string syntax <python:formatstrings>`. + + +printf-style Formatting +----------------------- + +Besides escaping, there's no special behavior involved with percent +formatting. + +.. code-block:: pycon + + >>> user = User(3, "<script>") + >>> Markup('<a href="/user/%d">%s</a>') % (user.id, user.name) + Markup('<a href="/user/3"><script></a>') + +See Python's docs on :ref:`printf-style formatting <python:old-string-formatting>`. diff --git a/docs/html.rst b/docs/html.rst new file mode 100644 index 0000000..dec87af --- /dev/null +++ b/docs/html.rst @@ -0,0 +1,49 @@ +.. currentmodule:: markupsafe + +HTML Representations +==================== + +In many frameworks, if a class implements an ``__html__`` method it +will be used to get the object's representation in HTML. MarkupSafe's +:func:`escape` function and :class:`Markup` class understand and +implement this method. If an object has an ``__html__`` method it will +be called rather than converting the object to a string, and the result +will be assumed safe and not escaped. + +For example, an ``Image`` class might automatically generate an +``<img>`` tag: + +.. code-block:: python + + class Image: + def __init__(self, url): + self.url = url + + def __html__(self): + return f'<img src="{self.url}">' + +.. code-block:: pycon + + >>> img = Image("/static/logo.png") + >>> Markup(img) + Markup('<img src="/static/logo.png">') + +Since this bypasses escaping, you need to be careful about using +user-provided data in the output. For example, a user's display name +should still be escaped: + +.. code-block:: python + + class User: + def __init__(self, id, name): + self.id = id + self.name = name + + def __html__(self): + return f'<a href="/user/{self.id}">{escape(self.name)}</a>' + +.. code-block:: pycon + + >>> user = User(3, "<script>") + >>> escape(user) + Markup('<a href="/users/3"><script></a>') diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..5c45e64 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,47 @@ +.. currentmodule:: markupsafe + +MarkupSafe +========== + +MarkupSafe escapes characters so text is safe to use in HTML and XML. +Characters that have special meanings are replaced so that they display +as the actual characters. This mitigates injection attacks, meaning +untrusted user input can safely be displayed on a page. + +The :func:`escape` function escapes text and returns a :class:`Markup` +object. The object won't be escaped anymore, but any text that is used +with it will be, ensuring that the result remains safe to use in HTML. + +>>> from markupsafe import escape +>>> hello = escape("<em>Hello</em>") +>>> hello +Markup('<em>Hello</em>') +>>> escape(hello) +Markup('<em>Hello</em>') +>>> hello + " <strong>World</strong>" +Markup('<em>Hello</em> <strong>World</strong>') + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U MarkupSafe + +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + +Table of Contents +----------------- + +.. toctree:: + :maxdepth: 2 + + escaping + html + formatting + license + changes diff --git a/docs/license.rst b/docs/license.rst new file mode 100644 index 0000000..a53a98c --- /dev/null +++ b/docs/license.rst @@ -0,0 +1,4 @@ +BSD-3-Clause License +==================== + +.. include:: ../LICENSE.rst diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..7893348 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/requirements/build.txt b/requirements/build.txt new file mode 100644 index 0000000..a735b3d --- /dev/null +++ b/requirements/build.txt @@ -0,0 +1,17 @@ +# SHA1:80754af91bfb6d1073585b046fe0a474ce868509 +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +build==0.9.0 + # via -r requirements/build.in +packaging==23.0 + # via build +pep517==0.13.0 + # via build +tomli==2.0.1 + # via + # build + # pep517 diff --git a/requirements/dev.txt b/requirements/dev.txt new file mode 100644 index 0000000..bde64ee --- /dev/null +++ b/requirements/dev.txt @@ -0,0 +1,64 @@ +# SHA1:54b5b77ec8c7a0064ffa93b2fd16cb0130ba177c +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +-r docs.txt +-r tests.txt +-r typing.txt +build==0.9.0 + # via pip-tools +cachetools==5.2.0 + # via tox +cfgv==3.3.1 + # via pre-commit +chardet==5.1.0 + # via tox +click==8.1.3 + # via + # pip-compile-multi + # pip-tools +colorama==0.4.6 + # via tox +distlib==0.3.6 + # via virtualenv +filelock==3.9.0 + # via + # tox + # virtualenv +identify==2.5.11 + # via pre-commit +nodeenv==1.7.0 + # via pre-commit +pep517==0.13.0 + # via build +pip-compile-multi==2.6.1 + # via -r requirements/dev.in +pip-tools==6.12.1 + # via pip-compile-multi +platformdirs==2.6.2 + # via + # tox + # virtualenv +pre-commit==2.21.0 + # via -r requirements/dev.in +pyproject-api==1.2.1 + # via tox +pyyaml==6.0 + # via pre-commit +toposort==1.7 + # via pip-compile-multi +tox==4.1.0 + # via -r requirements/dev.in +virtualenv==20.17.1 + # via + # pre-commit + # tox +wheel==0.38.4 + # via pip-tools + +# The following packages are considered to be unsafe in a requirements file: +# pip +# setuptools diff --git a/requirements/docs.txt b/requirements/docs.txt new file mode 100644 index 0000000..6183010 --- /dev/null +++ b/requirements/docs.txt @@ -0,0 +1,63 @@ +# SHA1:45c590f97fe95b8bdc755eef796e91adf5fbe4ea +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +alabaster==0.7.12 + # via sphinx +babel==2.11.0 + # via sphinx +certifi==2022.12.7 + # via requests +charset-normalizer==2.1.1 + # via requests +docutils==0.19 + # via sphinx +idna==3.4 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.2 + # via sphinx +markupsafe==2.1.1 + # via jinja2 +packaging==22.0 + # via + # pallets-sphinx-themes + # sphinx +pallets-sphinx-themes==2.0.3 + # via -r requirements/docs.in +pygments==2.13.0 + # via sphinx +pytz==2022.7 + # via babel +requests==2.28.1 + # via sphinx +snowballstemmer==2.2.0 + # via sphinx +sphinx==6.0.0 + # via + # -r requirements/docs.in + # pallets-sphinx-themes + # sphinx-issues + # sphinxcontrib-log-cabinet +sphinx-issues==3.0.1 + # via -r requirements/docs.in +sphinxcontrib-applehelp==1.0.2 + # via sphinx +sphinxcontrib-devhelp==1.0.2 + # via sphinx +sphinxcontrib-htmlhelp==2.0.0 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-log-cabinet==1.0.1 + # via -r requirements/docs.in +sphinxcontrib-qthelp==1.0.3 + # via sphinx +sphinxcontrib-serializinghtml==1.1.5 + # via sphinx +urllib3==1.26.13 + # via requests diff --git a/requirements/tests.txt b/requirements/tests.txt new file mode 100644 index 0000000..fa8c24a --- /dev/null +++ b/requirements/tests.txt @@ -0,0 +1,21 @@ +# SHA1:0eaa389e1fdb3a1917c0f987514bd561be5718ee +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +attrs==22.2.0 + # via pytest +exceptiongroup==1.1.0 + # via pytest +iniconfig==1.1.1 + # via pytest +packaging==22.0 + # via pytest +pluggy==1.0.0 + # via pytest +pytest==7.2.0 + # via -r requirements/tests.in +tomli==2.0.1 + # via pytest diff --git a/requirements/typing.txt b/requirements/typing.txt new file mode 100644 index 0000000..a70f3f5 --- /dev/null +++ b/requirements/typing.txt @@ -0,0 +1,15 @@ +# SHA1:7983aaa01d64547827c20395d77e248c41b2572f +# +# This file is autogenerated by pip-compile-multi +# To update, run: +# +# pip-compile-multi +# +mypy==0.991 + # via -r requirements/typing.in +mypy-extensions==0.4.3 + # via mypy +tomli==2.0.1 + # via mypy +typing-extensions==4.4.0 + # via mypy diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..1821e35 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,88 @@ +[metadata] +name = MarkupSafe +version = attr: markupsafe.__version__ +url = https://palletsprojects.com/p/markupsafe/ +project_urls = + Donate = https://palletsprojects.com/donate + Documentation = https://markupsafe.palletsprojects.com/ + Changes = https://markupsafe.palletsprojects.com/changes/ + Source Code = https://github.com/pallets/markupsafe/ + Issue Tracker = https://github.com/pallets/markupsafe/issues/ + Twitter = https://twitter.com/PalletsTeam + Chat = https://discord.gg/pallets +license = BSD-3-Clause +license_files = LICENSE.rst +author = Armin Ronacher +author_email = armin.ronacher@active-4.com +maintainer = Pallets +maintainer_email = contact@palletsprojects.com +description = Safely add untrusted strings to HTML/XML markup. +long_description = file: README.rst +long_description_content_type = text/x-rst +classifiers = + Development Status :: 5 - Production/Stable + Environment :: Web Environment + Intended Audience :: Developers + License :: OSI Approved :: BSD License + Operating System :: OS Independent + Programming Language :: Python + Topic :: Internet :: WWW/HTTP :: Dynamic Content + Topic :: Text Processing :: Markup :: HTML + +[options] +packages = find: +package_dir = = src +include_package_data = True +python_requires = >= 3.7 + +[options.packages.find] +where = src + +[tool:pytest] +testpaths = tests +filterwarnings = + error + +[coverage:run] +branch = True +source = + markupsafe + tests + +[coverage:paths] +source = + src + */site-packages + +[flake8] +select = B, E, F, W, B9, ISC +ignore = + E203 + E501 + E722 + W503 + B905 +max-line-length = 80 + +[mypy] +files = src/markupsafe +python_version = 3.7 +show_error_codes = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +no_implicit_optional = True +local_partial_types = True +no_implicit_reexport = True +strict_equality = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True +warn_return_any = True +warn_unreachable = True + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d01eb74 --- /dev/null +++ b/setup.py @@ -0,0 +1,85 @@ +import os +import platform +import sys + +from distutils.errors import CCompilerError +from distutils.errors import DistutilsExecError +from distutils.errors import DistutilsPlatformError +from setuptools import Extension +from setuptools import setup +from setuptools.command.build_ext import build_ext + +ext_modules = [Extension("markupsafe._speedups", ["src/markupsafe/_speedups.c"])] + + +class BuildFailed(Exception): + pass + + +class ve_build_ext(build_ext): + """This class allows C extension building to fail.""" + + def run(self): + try: + build_ext.run(self) + except DistutilsPlatformError as e: + raise BuildFailed() from e + + def build_extension(self, ext): + try: + build_ext.build_extension(self, ext) + except (CCompilerError, DistutilsExecError, DistutilsPlatformError) as e: + raise BuildFailed() from e + except ValueError as e: + # this can happen on Windows 64 bit, see Python issue 7511 + if "'path'" in str(sys.exc_info()[1]): # works with Python 2 and 3 + raise BuildFailed() from e + raise + + +def run_setup(with_binary): + setup( + name="MarkupSafe", + cmdclass={"build_ext": ve_build_ext}, + ext_modules=ext_modules if with_binary else [], + ) + + +def show_message(*lines): + print("=" * 74) + for line in lines: + print(line) + print("=" * 74) + + +supports_speedups = platform.python_implementation() not in { + "PyPy", + "Jython", + "GraalVM", +} + +if os.environ.get("CIBUILDWHEEL", "0") == "1" and supports_speedups: + run_setup(True) +elif supports_speedups: + try: + run_setup(True) + except BuildFailed: + show_message( + "WARNING: The C extension could not be compiled, speedups" + " are not enabled.", + "Failure information, if any, is above.", + "Retrying the build without the C extension now.", + ) + run_setup(False) + show_message( + "WARNING: The C extension could not be compiled, speedups" + " are not enabled.", + "Plain-Python build succeeded.", + ) +else: + run_setup(False) + show_message( + "WARNING: C extensions are not supported on this Python" + " platform, speedups are not enabled.", + "Plain-Python build succeeded.", + ) diff --git a/src/MarkupSafe.egg-info/PKG-INFO b/src/MarkupSafe.egg-info/PKG-INFO new file mode 100644 index 0000000..4a34999 --- /dev/null +++ b/src/MarkupSafe.egg-info/PKG-INFO @@ -0,0 +1,98 @@ +Metadata-Version: 2.1 +Name: MarkupSafe +Version: 2.1.2 +Summary: Safely add untrusted strings to HTML/XML markup. +Home-page: https://palletsprojects.com/p/markupsafe/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: Pallets +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Donate, https://palletsprojects.com/donate +Project-URL: Documentation, https://markupsafe.palletsprojects.com/ +Project-URL: Changes, https://markupsafe.palletsprojects.com/changes/ +Project-URL: Source Code, https://github.com/pallets/markupsafe/ +Project-URL: Issue Tracker, https://github.com/pallets/markupsafe/issues/ +Project-URL: Twitter, https://twitter.com/PalletsTeam +Project-URL: Chat, https://discord.gg/pallets +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Text Processing :: Markup :: HTML +Requires-Python: >=3.7 +Description-Content-Type: text/x-rst +License-File: LICENSE.rst + +MarkupSafe +========== + +MarkupSafe implements a text object that escapes characters so it is +safe to use in HTML and XML. Characters that have special meanings are +replaced so that they display as the actual characters. This mitigates +injection attacks, meaning untrusted user input can safely be displayed +on a page. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U MarkupSafe + +.. _pip: https://pip.pypa.io/en/stable/getting-started/ + + +Examples +-------- + +.. code-block:: pycon + + >>> from markupsafe import Markup, escape + + >>> # escape replaces special characters and wraps in Markup + >>> escape("<script>alert(document.cookie);</script>") + Markup('<script>alert(document.cookie);</script>') + + >>> # wrap in Markup to mark text "safe" and prevent escaping + >>> Markup("<strong>Hello</strong>") + Markup('<strong>hello</strong>') + + >>> escape(Markup("<strong>Hello</strong>")) + Markup('<strong>hello</strong>') + + >>> # Markup is a str subclass + >>> # methods and operators escape their arguments + >>> template = Markup("Hello <em>{name}</em>") + >>> template.format(name='"World"') + Markup('Hello <em>"World"</em>') + + +Donate +------ + +The Pallets organization develops and supports MarkupSafe and other +popular packages. In order to grow the community of contributors and +users, and allow the maintainers to devote more time to the projects, +`please donate today`_. + +.. _please donate today: https://palletsprojects.com/donate + + +Links +----- + +- Documentation: https://markupsafe.palletsprojects.com/ +- Changes: https://markupsafe.palletsprojects.com/changes/ +- PyPI Releases: https://pypi.org/project/MarkupSafe/ +- Source Code: https://github.com/pallets/markupsafe/ +- Issue Tracker: https://github.com/pallets/markupsafe/issues/ +- Website: https://palletsprojects.com/p/markupsafe/ +- Twitter: https://twitter.com/PalletsTeam +- Chat: https://discord.gg/pallets diff --git a/src/MarkupSafe.egg-info/SOURCES.txt b/src/MarkupSafe.egg-info/SOURCES.txt new file mode 100644 index 0000000..8be9ba6 --- /dev/null +++ b/src/MarkupSafe.egg-info/SOURCES.txt @@ -0,0 +1,35 @@ +CHANGES.rst +LICENSE.rst +MANIFEST.in +README.rst +setup.cfg +setup.py +tox.ini +docs/Makefile +docs/changes.rst +docs/conf.py +docs/escaping.rst +docs/formatting.rst +docs/html.rst +docs/index.rst +docs/license.rst +docs/make.bat +requirements/build.txt +requirements/dev.txt +requirements/docs.txt +requirements/tests.txt +requirements/typing.txt +src/MarkupSafe.egg-info/PKG-INFO +src/MarkupSafe.egg-info/SOURCES.txt +src/MarkupSafe.egg-info/dependency_links.txt +src/MarkupSafe.egg-info/top_level.txt +src/markupsafe/__init__.py +src/markupsafe/_native.py +src/markupsafe/_speedups.c +src/markupsafe/_speedups.pyi +src/markupsafe/py.typed +tests/conftest.py +tests/test_escape.py +tests/test_exception_custom_html.py +tests/test_leak.py +tests/test_markupsafe.py
\ No newline at end of file diff --git a/src/MarkupSafe.egg-info/dependency_links.txt b/src/MarkupSafe.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/MarkupSafe.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/MarkupSafe.egg-info/top_level.txt b/src/MarkupSafe.egg-info/top_level.txt new file mode 100644 index 0000000..75bf729 --- /dev/null +++ b/src/MarkupSafe.egg-info/top_level.txt @@ -0,0 +1 @@ +markupsafe diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py new file mode 100644 index 0000000..7166b19 --- /dev/null +++ b/src/markupsafe/__init__.py @@ -0,0 +1,295 @@ +import functools +import re +import string +import typing as t + +if t.TYPE_CHECKING: + import typing_extensions as te + + class HasHTML(te.Protocol): + def __html__(self) -> str: + pass + + +__version__ = "2.1.2" + +_strip_comments_re = re.compile(r"<!--.*?-->", re.DOTALL) +_strip_tags_re = re.compile(r"<.*?>", re.DOTALL) + + +def _simple_escaping_wrapper(name: str) -> t.Callable[..., "Markup"]: + orig = getattr(str, name) + + @functools.wraps(orig) + def wrapped(self: "Markup", *args: t.Any, **kwargs: t.Any) -> "Markup": + args = _escape_argspec(list(args), enumerate(args), self.escape) # type: ignore + _escape_argspec(kwargs, kwargs.items(), self.escape) + return self.__class__(orig(self, *args, **kwargs)) + + return wrapped + + +class Markup(str): + """A string that is ready to be safely inserted into an HTML or XML + document, either because it was escaped or because it was marked + safe. + + Passing an object to the constructor converts it to text and wraps + it to mark it safe without escaping. To escape the text, use the + :meth:`escape` class method instead. + + >>> Markup("Hello, <em>World</em>!") + Markup('Hello, <em>World</em>!') + >>> Markup(42) + Markup('42') + >>> Markup.escape("Hello, <em>World</em>!") + Markup('Hello <em>World</em>!') + + This implements the ``__html__()`` interface that some frameworks + use. Passing an object that implements ``__html__()`` will wrap the + output of that method, marking it safe. + + >>> class Foo: + ... def __html__(self): + ... return '<a href="/foo">foo</a>' + ... + >>> Markup(Foo()) + Markup('<a href="/foo">foo</a>') + + This is a subclass of :class:`str`. It has the same methods, but + escapes their arguments and returns a ``Markup`` instance. + + >>> Markup("<em>%s</em>") % ("foo & bar",) + Markup('<em>foo & bar</em>') + >>> Markup("<em>Hello</em> ") + "<foo>" + Markup('<em>Hello</em> <foo>') + """ + + __slots__ = () + + def __new__( + cls, base: t.Any = "", encoding: t.Optional[str] = None, errors: str = "strict" + ) -> "Markup": + if hasattr(base, "__html__"): + base = base.__html__() + + if encoding is None: + return super().__new__(cls, base) + + return super().__new__(cls, base, encoding, errors) + + def __html__(self) -> "Markup": + return self + + def __add__(self, other: t.Union[str, "HasHTML"]) -> "Markup": + if isinstance(other, str) or hasattr(other, "__html__"): + return self.__class__(super().__add__(self.escape(other))) + + return NotImplemented + + def __radd__(self, other: t.Union[str, "HasHTML"]) -> "Markup": + if isinstance(other, str) or hasattr(other, "__html__"): + return self.escape(other).__add__(self) + + return NotImplemented + + def __mul__(self, num: "te.SupportsIndex") -> "Markup": + if isinstance(num, int): + return self.__class__(super().__mul__(num)) + + return NotImplemented + + __rmul__ = __mul__ + + def __mod__(self, arg: t.Any) -> "Markup": + if isinstance(arg, tuple): + # a tuple of arguments, each wrapped + arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) + elif hasattr(type(arg), "__getitem__") and not isinstance(arg, str): + # a mapping of arguments, wrapped + arg = _MarkupEscapeHelper(arg, self.escape) + else: + # a single argument, wrapped with the helper and a tuple + arg = (_MarkupEscapeHelper(arg, self.escape),) + + return self.__class__(super().__mod__(arg)) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({super().__repr__()})" + + def join(self, seq: t.Iterable[t.Union[str, "HasHTML"]]) -> "Markup": + return self.__class__(super().join(map(self.escape, seq))) + + join.__doc__ = str.join.__doc__ + + def split( # type: ignore + self, sep: t.Optional[str] = None, maxsplit: int = -1 + ) -> t.List["Markup"]: + return [self.__class__(v) for v in super().split(sep, maxsplit)] + + split.__doc__ = str.split.__doc__ + + def rsplit( # type: ignore + self, sep: t.Optional[str] = None, maxsplit: int = -1 + ) -> t.List["Markup"]: + return [self.__class__(v) for v in super().rsplit(sep, maxsplit)] + + rsplit.__doc__ = str.rsplit.__doc__ + + def splitlines(self, keepends: bool = False) -> t.List["Markup"]: # type: ignore + return [self.__class__(v) for v in super().splitlines(keepends)] + + splitlines.__doc__ = str.splitlines.__doc__ + + def unescape(self) -> str: + """Convert escaped markup back into a text string. This replaces + HTML entities with the characters they represent. + + >>> Markup("Main » <em>About</em>").unescape() + 'Main » <em>About</em>' + """ + from html import unescape + + return unescape(str(self)) + + def striptags(self) -> str: + """:meth:`unescape` the markup, remove tags, and normalize + whitespace to single spaces. + + >>> Markup("Main »\t<em>About</em>").striptags() + 'Main » About' + """ + # Use two regexes to avoid ambiguous matches. + value = _strip_comments_re.sub("", self) + value = _strip_tags_re.sub("", value) + value = " ".join(value.split()) + return Markup(value).unescape() + + @classmethod + def escape(cls, s: t.Any) -> "Markup": + """Escape a string. Calls :func:`escape` and ensures that for + subclasses the correct type is returned. + """ + rv = escape(s) + + if rv.__class__ is not cls: + return cls(rv) + + return rv + + for method in ( + "__getitem__", + "capitalize", + "title", + "lower", + "upper", + "replace", + "ljust", + "rjust", + "lstrip", + "rstrip", + "center", + "strip", + "translate", + "expandtabs", + "swapcase", + "zfill", + ): + locals()[method] = _simple_escaping_wrapper(method) + + del method + + def partition(self, sep: str) -> t.Tuple["Markup", "Markup", "Markup"]: + l, s, r = super().partition(self.escape(sep)) + cls = self.__class__ + return cls(l), cls(s), cls(r) + + def rpartition(self, sep: str) -> t.Tuple["Markup", "Markup", "Markup"]: + l, s, r = super().rpartition(self.escape(sep)) + cls = self.__class__ + return cls(l), cls(s), cls(r) + + def format(self, *args: t.Any, **kwargs: t.Any) -> "Markup": + formatter = EscapeFormatter(self.escape) + return self.__class__(formatter.vformat(self, args, kwargs)) + + def __html_format__(self, format_spec: str) -> "Markup": + if format_spec: + raise ValueError("Unsupported format specification for Markup.") + + return self + + +class EscapeFormatter(string.Formatter): + __slots__ = ("escape",) + + def __init__(self, escape: t.Callable[[t.Any], Markup]) -> None: + self.escape = escape + super().__init__() + + def format_field(self, value: t.Any, format_spec: str) -> str: + if hasattr(value, "__html_format__"): + rv = value.__html_format__(format_spec) + elif hasattr(value, "__html__"): + if format_spec: + raise ValueError( + f"Format specifier {format_spec} given, but {type(value)} does not" + " define __html_format__. A class that defines __html__ must define" + " __html_format__ to work with format specifiers." + ) + rv = value.__html__() + else: + # We need to make sure the format spec is str here as + # otherwise the wrong callback methods are invoked. + rv = string.Formatter.format_field(self, value, str(format_spec)) + return str(self.escape(rv)) + + +_ListOrDict = t.TypeVar("_ListOrDict", list, dict) + + +def _escape_argspec( + obj: _ListOrDict, iterable: t.Iterable[t.Any], escape: t.Callable[[t.Any], Markup] +) -> _ListOrDict: + """Helper for various string-wrapped functions.""" + for key, value in iterable: + if isinstance(value, str) or hasattr(value, "__html__"): + obj[key] = escape(value) + + return obj + + +class _MarkupEscapeHelper: + """Helper for :meth:`Markup.__mod__`.""" + + __slots__ = ("obj", "escape") + + def __init__(self, obj: t.Any, escape: t.Callable[[t.Any], Markup]) -> None: + self.obj = obj + self.escape = escape + + def __getitem__(self, item: t.Any) -> "_MarkupEscapeHelper": + return _MarkupEscapeHelper(self.obj[item], self.escape) + + def __str__(self) -> str: + return str(self.escape(self.obj)) + + def __repr__(self) -> str: + return str(self.escape(repr(self.obj))) + + def __int__(self) -> int: + return int(self.obj) + + def __float__(self) -> float: + return float(self.obj) + + +# circular import +try: + from ._speedups import escape as escape + from ._speedups import escape_silent as escape_silent + from ._speedups import soft_str as soft_str +except ImportError: + from ._native import escape as escape + from ._native import escape_silent as escape_silent # noqa: F401 + from ._native import soft_str as soft_str # noqa: F401 diff --git a/src/markupsafe/_native.py b/src/markupsafe/_native.py new file mode 100644 index 0000000..8117b27 --- /dev/null +++ b/src/markupsafe/_native.py @@ -0,0 +1,63 @@ +import typing as t + +from . import Markup + + +def escape(s: t.Any) -> Markup: + """Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in + the string with HTML-safe sequences. Use this if you need to display + text that might contain such characters in HTML. + + If the object has an ``__html__`` method, it is called and the + return value is assumed to already be safe for HTML. + + :param s: An object to be converted to a string and escaped. + :return: A :class:`Markup` string with the escaped text. + """ + if hasattr(s, "__html__"): + return Markup(s.__html__()) + + return Markup( + str(s) + .replace("&", "&") + .replace(">", ">") + .replace("<", "<") + .replace("'", "'") + .replace('"', """) + ) + + +def escape_silent(s: t.Optional[t.Any]) -> Markup: + """Like :func:`escape` but treats ``None`` as the empty string. + Useful with optional values, as otherwise you get the string + ``'None'`` when the value is ``None``. + + >>> escape(None) + Markup('None') + >>> escape_silent(None) + Markup('') + """ + if s is None: + return Markup() + + return escape(s) + + +def soft_str(s: t.Any) -> str: + """Convert an object to a string if it isn't already. This preserves + a :class:`Markup` string rather than converting it back to a basic + string, so it will still be marked as safe and won't be escaped + again. + + >>> value = escape("<User 1>") + >>> value + Markup('<User 1>') + >>> escape(str(value)) + Markup('&lt;User 1&gt;') + >>> escape(soft_str(value)) + Markup('<User 1>') + """ + if not isinstance(s, str): + return str(s) + + return s diff --git a/src/markupsafe/_speedups.c b/src/markupsafe/_speedups.c new file mode 100644 index 0000000..3c463fb --- /dev/null +++ b/src/markupsafe/_speedups.c @@ -0,0 +1,320 @@ +#include <Python.h> + +static PyObject* markup; + +static int +init_constants(void) +{ + PyObject *module; + + /* import markup type so that we can mark the return value */ + module = PyImport_ImportModule("markupsafe"); + if (!module) + return 0; + markup = PyObject_GetAttrString(module, "Markup"); + Py_DECREF(module); + + return 1; +} + +#define GET_DELTA(inp, inp_end, delta) \ + while (inp < inp_end) { \ + switch (*inp++) { \ + case '"': \ + case '\'': \ + case '&': \ + delta += 4; \ + break; \ + case '<': \ + case '>': \ + delta += 3; \ + break; \ + } \ + } + +#define DO_ESCAPE(inp, inp_end, outp) \ + { \ + Py_ssize_t ncopy = 0; \ + while (inp < inp_end) { \ + switch (*inp) { \ + case '"': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = '#'; \ + *outp++ = '3'; \ + *outp++ = '4'; \ + *outp++ = ';'; \ + break; \ + case '\'': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = '#'; \ + *outp++ = '3'; \ + *outp++ = '9'; \ + *outp++ = ';'; \ + break; \ + case '&': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'a'; \ + *outp++ = 'm'; \ + *outp++ = 'p'; \ + *outp++ = ';'; \ + break; \ + case '<': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'l'; \ + *outp++ = 't'; \ + *outp++ = ';'; \ + break; \ + case '>': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'g'; \ + *outp++ = 't'; \ + *outp++ = ';'; \ + break; \ + default: \ + ncopy++; \ + } \ + inp++; \ + } \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + } + +static PyObject* +escape_unicode_kind1(PyUnicodeObject *in) +{ + Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in); + Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS1 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, + PyUnicode_IS_ASCII(in) ? 127 : 255); + if (!out) + return NULL; + + inp = PyUnicode_1BYTE_DATA(in); + outp = PyUnicode_1BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode_kind2(PyUnicodeObject *in) +{ + Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in); + Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS2 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535); + if (!out) + return NULL; + + inp = PyUnicode_2BYTE_DATA(in); + outp = PyUnicode_2BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + + +static PyObject* +escape_unicode_kind4(PyUnicodeObject *in) +{ + Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in); + Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS4 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111); + if (!out) + return NULL; + + inp = PyUnicode_4BYTE_DATA(in); + outp = PyUnicode_4BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode(PyUnicodeObject *in) +{ + if (PyUnicode_READY(in)) + return NULL; + + switch (PyUnicode_KIND(in)) { + case PyUnicode_1BYTE_KIND: + return escape_unicode_kind1(in); + case PyUnicode_2BYTE_KIND: + return escape_unicode_kind2(in); + case PyUnicode_4BYTE_KIND: + return escape_unicode_kind4(in); + } + assert(0); /* shouldn't happen */ + return NULL; +} + +static PyObject* +escape(PyObject *self, PyObject *text) +{ + static PyObject *id_html; + PyObject *s = NULL, *rv = NULL, *html; + + if (id_html == NULL) { + id_html = PyUnicode_InternFromString("__html__"); + if (id_html == NULL) { + return NULL; + } + } + + /* we don't have to escape integers, bools or floats */ + if (PyLong_CheckExact(text) || + PyFloat_CheckExact(text) || PyBool_Check(text) || + text == Py_None) + return PyObject_CallFunctionObjArgs(markup, text, NULL); + + /* if the object has an __html__ method that performs the escaping */ + html = PyObject_GetAttr(text ,id_html); + if (html) { + s = PyObject_CallObject(html, NULL); + Py_DECREF(html); + if (s == NULL) { + return NULL; + } + /* Convert to Markup object */ + rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); + Py_DECREF(s); + return rv; + } + + /* otherwise make the object unicode if it isn't, then escape */ + PyErr_Clear(); + if (!PyUnicode_Check(text)) { + PyObject *unicode = PyObject_Str(text); + if (!unicode) + return NULL; + s = escape_unicode((PyUnicodeObject*)unicode); + Py_DECREF(unicode); + } + else + s = escape_unicode((PyUnicodeObject*)text); + + /* convert the unicode string into a markup object. */ + rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); + Py_DECREF(s); + return rv; +} + + +static PyObject* +escape_silent(PyObject *self, PyObject *text) +{ + if (text != Py_None) + return escape(self, text); + return PyObject_CallFunctionObjArgs(markup, NULL); +} + + +static PyObject* +soft_str(PyObject *self, PyObject *s) +{ + if (!PyUnicode_Check(s)) + return PyObject_Str(s); + Py_INCREF(s); + return s; +} + + +static PyMethodDef module_methods[] = { + { + "escape", + (PyCFunction)escape, + METH_O, + "Replace the characters ``&``, ``<``, ``>``, ``'``, and ``\"`` in" + " the string with HTML-safe sequences. Use this if you need to display" + " text that might contain such characters in HTML.\n\n" + "If the object has an ``__html__`` method, it is called and the" + " return value is assumed to already be safe for HTML.\n\n" + ":param s: An object to be converted to a string and escaped.\n" + ":return: A :class:`Markup` string with the escaped text.\n" + }, + { + "escape_silent", + (PyCFunction)escape_silent, + METH_O, + "Like :func:`escape` but treats ``None`` as the empty string." + " Useful with optional values, as otherwise you get the string" + " ``'None'`` when the value is ``None``.\n\n" + ">>> escape(None)\n" + "Markup('None')\n" + ">>> escape_silent(None)\n" + "Markup('')\n" + }, + { + "soft_str", + (PyCFunction)soft_str, + METH_O, + "Convert an object to a string if it isn't already. This preserves" + " a :class:`Markup` string rather than converting it back to a basic" + " string, so it will still be marked as safe and won't be escaped" + " again.\n\n" + ">>> value = escape(\"<User 1>\")\n" + ">>> value\n" + "Markup('<User 1>')\n" + ">>> escape(str(value))\n" + "Markup('&lt;User 1&gt;')\n" + ">>> escape(soft_str(value))\n" + "Markup('<User 1>')\n" + }, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef module_definition = { + PyModuleDef_HEAD_INIT, + "markupsafe._speedups", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__speedups(void) +{ + if (!init_constants()) + return NULL; + + return PyModule_Create(&module_definition); +} diff --git a/src/markupsafe/_speedups.pyi b/src/markupsafe/_speedups.pyi new file mode 100644 index 0000000..f673240 --- /dev/null +++ b/src/markupsafe/_speedups.pyi @@ -0,0 +1,9 @@ +from typing import Any +from typing import Optional + +from . import Markup + +def escape(s: Any) -> Markup: ... +def escape_silent(s: Optional[Any]) -> Markup: ... +def soft_str(s: Any) -> str: ... +def soft_unicode(s: Any) -> str: ... diff --git a/src/markupsafe/py.typed b/src/markupsafe/py.typed new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/markupsafe/py.typed diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d040ea8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,37 @@ +import pytest + +from markupsafe import _native + +try: + from markupsafe import _speedups +except ImportError: + _speedups = None # type: ignore + + +@pytest.fixture( + scope="session", + params=( + _native, + pytest.param( + _speedups, + marks=pytest.mark.skipif(_speedups is None, reason="speedups unavailable"), + ), + ), +) +def _mod(request): + return request.param + + +@pytest.fixture(scope="session") +def escape(_mod): + return _mod.escape + + +@pytest.fixture(scope="session") +def escape_silent(_mod): + return _mod.escape_silent + + +@pytest.fixture(scope="session") +def soft_str(_mod): + return _mod.soft_str diff --git a/tests/test_escape.py b/tests/test_escape.py new file mode 100644 index 0000000..bf53fac --- /dev/null +++ b/tests/test_escape.py @@ -0,0 +1,29 @@ +import pytest + +from markupsafe import Markup + + +@pytest.mark.parametrize( + ("value", "expect"), + ( + # empty + ("", ""), + # ascii + ("abcd&><'\"efgh", "abcd&><'"efgh"), + ("&><'\"efgh", "&><'"efgh"), + ("abcd&><'\"", "abcd&><'""), + # 2 byte + ("こんにちは&><'\"こんばんは", "こんにちは&><'"こんばんは"), + ("&><'\"こんばんは", "&><'"こんばんは"), + ("こんにちは&><'\"", "こんにちは&><'""), + # 4 byte + ( + "\U0001F363\U0001F362&><'\"\U0001F37A xyz", + "\U0001F363\U0001F362&><'"\U0001F37A xyz", + ), + ("&><'\"\U0001F37A xyz", "&><'"\U0001F37A xyz"), + ("\U0001F363\U0001F362&><'\"", "\U0001F363\U0001F362&><'""), + ), +) +def test_escape(escape, value, expect): + assert escape(value) == Markup(expect) diff --git a/tests/test_exception_custom_html.py b/tests/test_exception_custom_html.py new file mode 100644 index 0000000..ec2f10b --- /dev/null +++ b/tests/test_exception_custom_html.py @@ -0,0 +1,18 @@ +import pytest + + +class CustomHtmlThatRaises: + def __html__(self): + raise ValueError(123) + + +def test_exception_custom_html(escape): + """Checks whether exceptions in custom __html__ implementations are + propagated correctly. + + There was a bug in the native implementation at some point: + https://github.com/pallets/markupsafe/issues/108 + """ + obj = CustomHtmlThatRaises() + with pytest.raises(ValueError): + escape(obj) diff --git a/tests/test_leak.py b/tests/test_leak.py new file mode 100644 index 0000000..55b10b9 --- /dev/null +++ b/tests/test_leak.py @@ -0,0 +1,28 @@ +import gc +import platform + +import pytest + +from markupsafe import escape + + +@pytest.mark.skipif( + escape.__module__ == "markupsafe._native", + reason="only test memory leak with speedups", +) +def test_markup_leaks(): + counts = set() + + for _i in range(20): + for _j in range(1000): + escape("foo") + escape("<foo>") + escape("foo") + escape("<foo>") + + if platform.python_implementation() == "PyPy": + gc.collect() + + counts.add(len(gc.get_objects())) + + assert len(counts) == 1 diff --git a/tests/test_markupsafe.py b/tests/test_markupsafe.py new file mode 100644 index 0000000..a62ebf9 --- /dev/null +++ b/tests/test_markupsafe.py @@ -0,0 +1,196 @@ +import pytest + +from markupsafe import Markup + + +def test_adding(escape): + unsafe = '<script type="application/x-some-script">alert("foo");</script>' + safe = Markup("<em>username</em>") + assert unsafe + safe == str(escape(unsafe)) + str(safe) + + +@pytest.mark.parametrize( + ("template", "data", "expect"), + ( + ("<em>%s</em>", "<bad user>", "<em><bad user></em>"), + ( + "<em>%(username)s</em>", + {"username": "<bad user>"}, + "<em><bad user></em>", + ), + ("%i", 3.14, "3"), + ("%.2f", 3.14, "3.14"), + ), +) +def test_string_interpolation(template, data, expect): + assert Markup(template) % data == expect + + +def test_type_behavior(): + assert type(Markup("foo") + "bar") is Markup + x = Markup("foo") + assert x.__html__() is x + + +def test_html_interop(): + class Foo: + def __html__(self): + return "<em>awesome</em>" + + def __str__(self): + return "awesome" + + assert Markup(Foo()) == "<em>awesome</em>" + result = Markup("<strong>%s</strong>") % Foo() + assert result == "<strong><em>awesome</em></strong>" + + +@pytest.mark.parametrize("args", ["foo", 42, ("foo", 42)]) +def test_missing_interpol(args): + with pytest.raises(TypeError): + Markup("<em></em>") % args + + +def test_tuple_interpol(): + result = Markup("<em>%s:%s</em>") % ("<foo>", "<bar>") + expect = Markup("<em><foo>:<bar></em>") + assert result == expect + + +def test_dict_interpol(): + result = Markup("<em>%(foo)s</em>") % {"foo": "<foo>"} + expect = Markup("<em><foo></em>") + assert result == expect + + result = Markup("<em>%(foo)s:%(bar)s</em>") % {"foo": "<foo>", "bar": "<bar>"} + expect = Markup("<em><foo>:<bar></em>") + assert result == expect + + +def test_escaping(escape): + assert escape("\"<>&'") == ""<>&'" + assert ( + Markup( + "<!-- outer comment -->" + "<em>Foo & Bar" + "<!-- inner comment about <em> -->" + "</em>" + "<!-- comment\nwith\nnewlines\n-->" + "<meta content='tag\nwith\nnewlines'>" + ).striptags() + == "Foo & Bar" + ) + + +def test_unescape(): + assert Markup("<test>").unescape() == "<test>" + + result = Markup("jack & tavi are cooler than mike & russ").unescape() + expect = "jack & tavi are cooler than mike & russ" + assert result == expect + + original = "&foo;" + once = Markup(original).unescape() + twice = Markup(once).unescape() + expect = "&foo;" + assert once == expect + assert twice == expect + + +def test_format(): + result = Markup("<em>{awesome}</em>").format(awesome="<awesome>") + assert result == "<em><awesome></em>" + + result = Markup("{0[1][bar]}").format([0, {"bar": "<bar/>"}]) + assert result == "<bar/>" + + result = Markup("{0[1][bar]}").format([0, {"bar": Markup("<bar/>")}]) + assert result == "<bar/>" + + +def test_formatting_empty(): + formatted = Markup("{}").format(0) + assert formatted == Markup("0") + + +def test_custom_formatting(): + class HasHTMLOnly: + def __html__(self): + return Markup("<foo>") + + class HasHTMLAndFormat: + def __html__(self): + return Markup("<foo>") + + def __html_format__(self, spec): + return Markup("<FORMAT>") + + assert Markup("{0}").format(HasHTMLOnly()) == Markup("<foo>") + assert Markup("{0}").format(HasHTMLAndFormat()) == Markup("<FORMAT>") + + +def test_complex_custom_formatting(): + class User: + def __init__(self, id, username): + self.id = id + self.username = username + + def __html_format__(self, format_spec): + if format_spec == "link": + return Markup('<a href="/user/{0}">{1}</a>').format( + self.id, self.__html__() + ) + elif format_spec: + raise ValueError("Invalid format spec") + + return self.__html__() + + def __html__(self): + return Markup("<span class=user>{0}</span>").format(self.username) + + user = User(1, "foo") + result = Markup("<p>User: {0:link}").format(user) + expect = Markup('<p>User: <a href="/user/1"><span class=user>foo</span></a>') + assert result == expect + + +def test_formatting_with_objects(): + class Stringable: + def __str__(self): + return "строка" + + assert Markup("{s}").format(s=Stringable()) == Markup("строка") + + +def test_escape_silent(escape, escape_silent): + assert escape_silent(None) == Markup() + assert escape(None) == Markup(None) + assert escape_silent("<foo>") == Markup("<foo>") + + +def test_splitting(): + expect = [Markup("a"), Markup("b")] + assert Markup("a b").split() == expect + assert Markup("a b").rsplit() == expect + assert Markup("a\nb").splitlines() == expect + + +def test_mul(): + assert Markup("a") * 3 == Markup("aaa") + + +def test_escape_return_type(escape): + assert isinstance(escape("a"), Markup) + assert isinstance(escape(Markup("a")), Markup) + + class Foo: + def __html__(self): + return "<strong>Foo</strong>" + + assert isinstance(escape(Foo()), Markup) + + +def test_soft_str(soft_str): + assert type(soft_str("")) is str + assert type(soft_str(Markup())) is Markup + assert type(soft_str(15)) is str @@ -0,0 +1,28 @@ +[tox] +envlist = + py3{12,11,10,9,8,7} + pypy39 + style + typing + docs +skip_missing_interpreters = true + +[testenv] +package = wheel +deps = -r requirements/tests.txt +commands = pytest -v --tb=short --basetemp={envtmpdir} {posargs} + +[testenv:style] +deps = pre-commit +skip_install = true +commands = pre-commit run --all-files + +[testenv:typing] +package = wheel +deps = -r requirements/typing.txt +commands = mypy + +[testenv:docs] +package = wheel +deps = -r requirements/docs.txt +commands = sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html |