diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:23:02 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-29 04:23:02 +0000 |
commit | 943e3dc057eca53e68ddec51529bd6a1279ebd8e (patch) | |
tree | 61fb7bac619a56dfbcdcbdb7b0d4d6535fc36fe9 /myst_parser/mdit_to_docutils/utils.py | |
parent | Initial commit. (diff) | |
download | myst-parser-943e3dc057eca53e68ddec51529bd6a1279ebd8e.tar.xz myst-parser-943e3dc057eca53e68ddec51529bd6a1279ebd8e.zip |
Adding upstream version 0.18.1.upstream/0.18.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | myst_parser/mdit_to_docutils/utils.py | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/myst_parser/mdit_to_docutils/utils.py b/myst_parser/mdit_to_docutils/utils.py new file mode 100644 index 0000000..b31d8c7 --- /dev/null +++ b/myst_parser/mdit_to_docutils/utils.py @@ -0,0 +1,36 @@ +import html +from typing import Iterable, Optional +from urllib.parse import quote, urlparse + + +def escape_url(raw: str) -> str: + """ + Escape urls to prevent code injection craziness. (Hopefully.) + """ + return html.escape(quote(html.unescape(raw), safe="/#:()*?=%@+,&")) + + +def is_external_url( + reference: str, + known_url_schemes: Optional[Iterable[str]], + match_fragment: bool = False, +) -> bool: + """Return if a reference should be recognised as an external URL. + + URLs are of the format: scheme://netloc/path;parameters?query#fragment + + This checks if there is a url scheme (e.g. 'https') and, if so, + if the scheme is is the list of known_url_schemes (if supplied). + + :param known_url_schemes: e.g. ["http", "https", "mailto"] + If None, match all schemes + :param match_fragment: If True and a fragment found, then True will be returned, + irrespective of a scheme match + + """ + url_check = urlparse(reference) + if known_url_schemes is not None: + scheme_known = url_check.scheme in known_url_schemes + else: + scheme_known = bool(url_check.scheme) + return scheme_known or (match_fragment and url_check.fragment != "") |