From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- testing/web-platform/tests/html/tools/README | 1 + .../tests/html/tools/html5lib_python_revision | 1 + .../tests/html/tools/html5lib_test.xml | 29 +++ .../tests/html/tools/html5lib_test_fragment.xml | 28 +++ .../tests/html/tools/html5lib_tests_revision | 1 + .../tests/html/tools/update_html5lib_tests.py | 201 +++++++++++++++++++++ 6 files changed, 261 insertions(+) create mode 100644 testing/web-platform/tests/html/tools/README create mode 100644 testing/web-platform/tests/html/tools/html5lib_python_revision create mode 100644 testing/web-platform/tests/html/tools/html5lib_test.xml create mode 100644 testing/web-platform/tests/html/tools/html5lib_test_fragment.xml create mode 100644 testing/web-platform/tests/html/tools/html5lib_tests_revision create mode 100644 testing/web-platform/tests/html/tools/update_html5lib_tests.py (limited to 'testing/web-platform/tests/html/tools') diff --git a/testing/web-platform/tests/html/tools/README b/testing/web-platform/tests/html/tools/README new file mode 100644 index 0000000000..2ebde65fcd --- /dev/null +++ b/testing/web-platform/tests/html/tools/README @@ -0,0 +1 @@ +See ../syntax/parsing/README. diff --git a/testing/web-platform/tests/html/tools/html5lib_python_revision b/testing/web-platform/tests/html/tools/html5lib_python_revision new file mode 100644 index 0000000000..e0f5dd464a --- /dev/null +++ b/testing/web-platform/tests/html/tools/html5lib_python_revision @@ -0,0 +1 @@ +f7cab6f019ce94a1ec0192b6ff29aaebaf10b50d \ No newline at end of file diff --git a/testing/web-platform/tests/html/tools/html5lib_test.xml b/testing/web-platform/tests/html/tools/html5lib_test.xml new file mode 100644 index 0000000000..8af4adce16 --- /dev/null +++ b/testing/web-platform/tests/html/tools/html5lib_test.xml @@ -0,0 +1,29 @@ + + + + HTML 5 Parser tests ${file_name} + + + + + + +

html5lib Parser Test

+
+ + + + + + + + + diff --git a/testing/web-platform/tests/html/tools/html5lib_test_fragment.xml b/testing/web-platform/tests/html/tools/html5lib_test_fragment.xml new file mode 100644 index 0000000000..794d13f1c4 --- /dev/null +++ b/testing/web-platform/tests/html/tools/html5lib_test_fragment.xml @@ -0,0 +1,28 @@ + + + + HTML 5 Parser tests ${file_name} + + + +

html5lib Parser Test

+
+ + + + + + + + diff --git a/testing/web-platform/tests/html/tools/html5lib_tests_revision b/testing/web-platform/tests/html/tools/html5lib_tests_revision new file mode 100644 index 0000000000..3ea6d572fd --- /dev/null +++ b/testing/web-platform/tests/html/tools/html5lib_tests_revision @@ -0,0 +1 @@ +a9f44960a9fedf265093d22b2aa3c7ca123727b9 diff --git a/testing/web-platform/tests/html/tools/update_html5lib_tests.py b/testing/web-platform/tests/html/tools/update_html5lib_tests.py new file mode 100644 index 0000000000..2374bb0cda --- /dev/null +++ b/testing/web-platform/tests/html/tools/update_html5lib_tests.py @@ -0,0 +1,201 @@ +import glob +import hashlib +import itertools +import json +import os +import re +import shutil +import site +import subprocess +import sys +import tempfile +import urllib +from importlib import reload + + +import genshi +from genshi.template import MarkupTemplate + + +TESTS_PATH = "html/syntax/parsing/" + +def get_paths(): + script_path = os.path.dirname(os.path.abspath(__file__)) + repo_base = get_repo_base(script_path) + tests_path = os.path.join(repo_base, TESTS_PATH) + return script_path, tests_path + + +def get_repo_base(path): + while path: + if os.path.exists(os.path.join(path, ".git")): + return path + else: + path = os.path.dirname(path) + + +def get_expected(data): + data = "#document\n" + data + return data + + +def get_hash(data, container=None): + if container == None: + container = "" + return hashlib.sha1(b"#container%s#data%s"%(container.encode("utf8"), + data.encode("utf8"))).hexdigest() + + +class Html5libInstall: + def __init__(self, rev=None, tests_rev=None): + self.html5lib_dir = None + self.rev = rev + self.tests_rev = tests_rev + + def __enter__(self): + self.html5lib_dir = tempfile.TemporaryDirectory() + html5lib_path = self.html5lib_dir.__enter__() + html5lib_python_path = os.path.join(html5lib_path, "html5lib") + html5lib_tests_path = os.path.join( + html5lib_python_path, "html5lib", "tests", "testdata" + ) + + subprocess.check_call( + [ + "git", + "clone", + "--no-checkout", + "https://github.com/html5lib/html5lib-python.git", + "html5lib", + ], + cwd=html5lib_path, + ) + + rev = self.rev if self.rev is not None else "origin/master" + subprocess.check_call( + ["git", "checkout", rev], cwd=html5lib_python_path + ) + + subprocess.check_call( + [ + "git", + "submodule", + "update", + "--init", + "--recursive", + ], + cwd=html5lib_python_path, + ) + + subprocess.check_call(["pip", "install", "-e", "html5lib"], cwd=html5lib_path) + reload(site) + + tests_rev = self.tests_rev if self.tests_rev is not None else "origin/master" + subprocess.check_call(["git", "checkout", tests_rev], cwd=html5lib_tests_path) + + def __exit__(self, *args, **kwargs): + subprocess.call(["pip", "uninstall", "-y", "html5lib"], cwd=self.html5lib_dir.name) + self.html5lib_dir.__exit__(*args, **kwargs) + self.html5lib_dir = None + + +def make_tests(script_dir, out_dir, input_file_name, test_data): + tests = [] + innerHTML_tests = [] + ids_seen = {} + print(input_file_name) + for test in test_data: + if "script-off" in test: + continue + is_innerHTML = "document-fragment" in test + data = test["data"] + container = test["document-fragment"] if is_innerHTML else None + assert test["document"], test + expected = get_expected(test["document"]) + test_list = innerHTML_tests if is_innerHTML else tests + test_id = get_hash(data, container) + if test_id in ids_seen: + print("WARNING: id %s seen multiple times in file %s this time for test (%s, %s) before for test %s, skipping"%(test_id, input_file_name, container, data, ids_seen[test_id])) + continue + ids_seen[test_id] = (container, data) + test_list.append({'string_uri_encoded_input':"\"%s\""%urllib.parse.quote(data.encode("utf8")), + 'input':data, + 'expected':expected, + 'string_escaped_expected':json.dumps(urllib.parse.quote(expected.encode("utf8"))), + 'id':test_id, + 'container':container + }) + path_normal = None + if tests: + path_normal = write_test_file(script_dir, out_dir, + tests, "html5lib_%s"%input_file_name, + "html5lib_test.xml") + path_innerHTML = None + if innerHTML_tests: + path_innerHTML = write_test_file(script_dir, out_dir, + innerHTML_tests, "html5lib_innerHTML_%s"%input_file_name, + "html5lib_test_fragment.xml") + + return path_normal, path_innerHTML + +def write_test_file(script_dir, out_dir, tests, file_name, template_file_name): + file_name = os.path.join(out_dir, file_name + ".html") + short_name = os.path.basename(file_name) + + with open(os.path.join(script_dir, template_file_name), "r") as f: + template = MarkupTemplate(f) + + stream = template.generate(file_name=short_name, tests=tests) + + with open(file_name, "w") as f: + f.write(str(stream.render('html', doctype='html5', + encoding="utf8"), "utf-8")) + return file_name + +def escape_js_string(in_data): + return in_data.encode("utf8").encode("string-escape") + +def serialize_filenames(test_filenames): + return "[" + ",\n".join("\"%s\""%item for item in test_filenames) + "]" + +def main(): + script_dir, out_dir = get_paths() + + test_files = [] + inner_html_files = [] + with open(os.path.join(script_dir, "html5lib_python_revision"), "r") as f: + html5lib_rev = f.read().strip() + + with open(os.path.join(script_dir, "html5lib_tests_revision"), "r") as f: + html5lib_tests_rev = f.read().strip() + + with Html5libInstall(html5lib_rev, html5lib_tests_rev): + from html5lib.tests import support + + if len(sys.argv) > 2: + test_iterator = zip( + itertools.repeat(False), + sorted(os.path.abspath(item) for item in + glob.glob(os.path.join(sys.argv[2], "*.dat")))) + else: + test_iterator = itertools.chain( + zip(itertools.repeat(False), + sorted(support.get_data_files("tree-construction"))), + zip(itertools.repeat(True), + sorted(support.get_data_files( + os.path.join("tree-construction", "scripted"))))) + + for (scripted, test_file) in test_iterator: + input_file_name = os.path.splitext(os.path.basename(test_file))[0] + if scripted: + input_file_name = "scripted_" + input_file_name + test_data = support.TestData(test_file) + test_filename, inner_html_file_name = make_tests(script_dir, out_dir, + input_file_name, test_data) + if test_filename is not None: + test_files.append(test_filename) + if inner_html_file_name is not None: + inner_html_files.append(inner_html_file_name) + +if __name__ == "__main__": + main() -- cgit v1.2.3