from __future__ import annotations from collections.abc import Sequence import functools import re DECODE_DEFAULT_CHARS = ";/?:@&=+$,#" DECODE_COMPONENT_CHARS = "" decode_cache: dict[str, list[str]] = {} def get_decode_cache(exclude: str) -> Sequence[str]: if exclude in decode_cache: return decode_cache[exclude] cache: list[str] = [] decode_cache[exclude] = cache for i in range(128): ch = chr(i) cache.append(ch) for i in range(len(exclude)): ch_code = ord(exclude[i]) cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:] return cache # Decode percent-encoded string. # def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str: cache = get_decode_cache(exclude) repl_func = functools.partial(repl_func_with_cache, cache=cache) return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE) def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str: seq = match.group() result = "" i = 0 l = len(seq) # noqa: E741 while i < l: b1 = int(seq[i + 1 : i + 3], 16) if b1 < 0x80: result += cache[b1] i += 3 # emulate JS for loop statement3 continue if (b1 & 0xE0) == 0xC0 and (i + 3 < l): # 110xxxxx 10xxxxxx b2 = int(seq[i + 4 : i + 6], 16) if (b2 & 0xC0) == 0x80: all_bytes = bytes((b1, b2)) try: result += all_bytes.decode() except UnicodeDecodeError: result += "\ufffd" * 2 i += 3 i += 3 # emulate JS for loop statement3 continue if (b1 & 0xF0) == 0xE0 and (i + 6 < l): # 1110xxxx 10xxxxxx 10xxxxxx b2 = int(seq[i + 4 : i + 6], 16) b3 = int(seq[i + 7 : i + 9], 16) if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80: all_bytes = bytes((b1, b2, b3)) try: result += all_bytes.decode() except UnicodeDecodeError: result += "\ufffd" * 3 i += 6 i += 3 # emulate JS for loop statement3 continue if (b1 & 0xF8) == 0xF0 and (i + 9 < l): # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx b2 = int(seq[i + 4 : i + 6], 16) b3 = int(seq[i + 7 : i + 9], 16) b4 = int(seq[i + 10 : i + 12], 16) if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80: all_bytes = bytes((b1, b2, b3, b4)) try: result += all_bytes.decode() except UnicodeDecodeError: result += "\ufffd" * 4 i += 9 i += 3 # emulate JS for loop statement3 continue result += "\ufffd" i += 3 # emulate JS for loop statement3 return result