diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /testing/web-platform/tests/tools/manifest/XMLParser.py | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'testing/web-platform/tests/tools/manifest/XMLParser.py')
-rw-r--r-- | testing/web-platform/tests/tools/manifest/XMLParser.py | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/testing/web-platform/tests/tools/manifest/XMLParser.py b/testing/web-platform/tests/tools/manifest/XMLParser.py new file mode 100644 index 0000000000..689533421d --- /dev/null +++ b/testing/web-platform/tests/tools/manifest/XMLParser.py @@ -0,0 +1,151 @@ +from os.path import dirname, join + +from collections import OrderedDict + +from xml.parsers import expat +import xml.etree.ElementTree as etree # noqa: N813 + +MYPY = False +if MYPY: + # MYPY is set to True when run under Mypy. + from typing import Dict + from typing import List + from typing import Optional + from typing import Text + from typing import Union + +_catalog = join(dirname(__file__), "catalog") + +def _wrap_error(e): + # type: (expat.error) -> etree.ParseError + err = etree.ParseError(e) + err.code = e.code + err.position = e.lineno, e.offset + raise err + +_names = {} # type: Dict[Text, Text] +def _fixname(key): + # type: (Text) -> Text + try: + name = _names[key] + except KeyError: + name = key + if "}" in name: + name = "{" + name + _names[key] = name + return name + + +_undefined_entity_code = expat.errors.codes[expat.errors.XML_ERROR_UNDEFINED_ENTITY] # type: int + + +class XMLParser: + """ + An XML parser with support for XHTML DTDs and all Python-supported encodings + + This implements the API defined by + xml.etree.ElementTree.XMLParser, but supports XHTML DTDs + (therefore allowing XHTML entities) and supports all encodings + Python does, rather than just those supported by expat. + """ + def __init__(self, encoding=None): + # type: (Optional[Text]) -> None + self._parser = expat.ParserCreate(encoding, "}") + self._target = etree.TreeBuilder() + # parser settings + self._parser.buffer_text = True + self._parser.ordered_attributes = True + self._parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + # parser callbacks + self._parser.XmlDeclHandler = self._xml_decl + self._parser.StartElementHandler = self._start + self._parser.EndElementHandler = self._end + self._parser.CharacterDataHandler = self._data + self._parser.ExternalEntityRefHandler = self._external + self._parser.SkippedEntityHandler = self._skipped # type: ignore + # used for our horrible re-encoding hack + self._fed_data = [] # type: Optional[List[bytes]] + self._read_encoding = None # type: Optional[Text] + + def _xml_decl(self, version, encoding, standalone): + # type: (Text, Optional[Text], int) -> None + self._read_encoding = encoding + + def _start(self, tag, attrib_in): + # type: (Text, List[str]) -> etree.Element + assert isinstance(tag, str) + self._fed_data = None + tag = _fixname(tag) + attrib = OrderedDict() # type: Dict[Union[bytes, Text], Union[bytes, Text]] + if attrib_in: + for i in range(0, len(attrib_in), 2): + attrib[_fixname(attrib_in[i])] = attrib_in[i+1] + return self._target.start(tag, attrib) + + def _data(self, text): + # type: (Text) -> None + self._target.data(text) + + def _end(self, tag): + # type: (Text) -> etree.Element + return self._target.end(_fixname(tag)) + + def _external(self, context, base, system_id, public_id): + # type: (Text, Optional[Text], Optional[Text], Optional[Text]) -> bool + if public_id in { + "-//W3C//DTD XHTML 1.0 Transitional//EN", + "-//W3C//DTD XHTML 1.1//EN", + "-//W3C//DTD XHTML 1.0 Strict//EN", + "-//W3C//DTD XHTML 1.0 Frameset//EN", + "-//W3C//DTD XHTML Basic 1.0//EN", + "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", + "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN", + "-//W3C//DTD MathML 2.0//EN", + "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" + }: + parser = self._parser.ExternalEntityParserCreate(context) + with open(join(_catalog, "xhtml.dtd"), "rb") as fp: + try: + parser.ParseFile(fp) + except expat.error: + return False + + return True + + def _skipped(self, name, is_parameter_entity): + # type: (Text, bool) -> None + err = expat.error("undefined entity %s: line %d, column %d" % + (name, self._parser.ErrorLineNumber, + self._parser.ErrorColumnNumber)) + err.code = _undefined_entity_code + err.lineno = self._parser.ErrorLineNumber + err.offset = self._parser.ErrorColumnNumber + raise err + + def feed(self, data): + # type: (bytes) -> None + if self._fed_data is not None: + self._fed_data.append(data) + try: + self._parser.Parse(data, False) + except expat.error as v: + _wrap_error(v) + except ValueError as e: + if e.args[0] == 'multi-byte encodings are not supported': + assert self._read_encoding is not None + assert self._fed_data is not None + xml = b"".join(self._fed_data).decode(self._read_encoding).encode("utf-8") + new_parser = XMLParser("utf-8") + self._parser = new_parser._parser + self._target = new_parser._target + self._fed_data = None + self.feed(xml) + + def close(self): + # type: () -> etree.Element + try: + self._parser.Parse("", True) + except expat.error as v: + _wrap_error(v) + tree = self._target.close() + return tree |