summaryrefslogtreecommitdiffstats
path: root/myst_parser/mdit_to_docutils/utils.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:23:02 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:23:02 +0000
commit943e3dc057eca53e68ddec51529bd6a1279ebd8e (patch)
tree61fb7bac619a56dfbcdcbdb7b0d4d6535fc36fe9 /myst_parser/mdit_to_docutils/utils.py
parentInitial commit. (diff)
downloadmyst-parser-943e3dc057eca53e68ddec51529bd6a1279ebd8e.tar.xz
myst-parser-943e3dc057eca53e68ddec51529bd6a1279ebd8e.zip
Adding upstream version 0.18.1.upstream/0.18.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'myst_parser/mdit_to_docutils/utils.py')
-rw-r--r--myst_parser/mdit_to_docutils/utils.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/myst_parser/mdit_to_docutils/utils.py b/myst_parser/mdit_to_docutils/utils.py
new file mode 100644
index 0000000..b31d8c7
--- /dev/null
+++ b/myst_parser/mdit_to_docutils/utils.py
@@ -0,0 +1,36 @@
+import html
+from typing import Iterable, Optional
+from urllib.parse import quote, urlparse
+
+
+def escape_url(raw: str) -> str:
+ """
+ Escape urls to prevent code injection craziness. (Hopefully.)
+ """
+ return html.escape(quote(html.unescape(raw), safe="/#:()*?=%@+,&"))
+
+
+def is_external_url(
+ reference: str,
+ known_url_schemes: Optional[Iterable[str]],
+ match_fragment: bool = False,
+) -> bool:
+ """Return if a reference should be recognised as an external URL.
+
+ URLs are of the format: scheme://netloc/path;parameters?query#fragment
+
+ This checks if there is a url scheme (e.g. 'https') and, if so,
+ if the scheme is is the list of known_url_schemes (if supplied).
+
+ :param known_url_schemes: e.g. ["http", "https", "mailto"]
+ If None, match all schemes
+ :param match_fragment: If True and a fragment found, then True will be returned,
+ irrespective of a scheme match
+
+ """
+ url_check = urlparse(reference)
+ if known_url_schemes is not None:
+ scheme_known = url_check.scheme in known_url_schemes
+ else:
+ scheme_known = bool(url_check.scheme)
+ return scheme_known or (match_fragment and url_check.fragment != "")