From 12e8343068b906f8b2afddc5569968a8a91fa5b0 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 29 Apr 2024 06:24:24 +0200 Subject: Adding upstream version 2.1.0. Signed-off-by: Daniel Baumann --- markdown_it/_punycode.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 markdown_it/_punycode.py (limited to 'markdown_it/_punycode.py') diff --git a/markdown_it/_punycode.py b/markdown_it/_punycode.py new file mode 100644 index 0000000..9ad2442 --- /dev/null +++ b/markdown_it/_punycode.py @@ -0,0 +1,66 @@ +# Copyright 2014 Mathias Bynens +# Copyright 2021 Taneli Hukkinen +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import codecs +import re + +REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]") +REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]") + + +def encode(uni: str) -> str: + return codecs.encode(uni, encoding="punycode").decode() + + +def decode(ascii: str) -> str: + return codecs.decode(ascii, encoding="punycode") # type: ignore[call-overload] + + +def map_domain(string, fn): + parts = string.split("@") + result = "" + if len(parts) > 1: + # In email addresses, only the domain name should be punycoded. Leave + # the local part (i.e. everything up to `@`) intact. + result = parts[0] + "@" + string = parts[1] + labels = REGEX_SEPARATORS.split(string) + encoded = ".".join(fn(label) for label in labels) + return result + encoded + + +def to_unicode(obj: str) -> str: + def mapping(obj: str) -> str: + if obj.startswith("xn--"): + return decode(obj[4:].lower()) + return obj + + return map_domain(obj, mapping) + + +def to_ascii(obj: str) -> str: + def mapping(obj: str) -> str: + if REGEX_NON_ASCII.search(obj): + return "xn--" + encode(obj) + return obj + + return map_domain(obj, mapping) -- cgit v1.2.3