summaryrefslogtreecommitdiffstats
path: root/tests/test_html
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:23:02 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:23:02 +0000
commit943e3dc057eca53e68ddec51529bd6a1279ebd8e (patch)
tree61fb7bac619a56dfbcdcbdb7b0d4d6535fc36fe9 /tests/test_html
parentInitial commit. (diff)
downloadmyst-parser-upstream.tar.xz
myst-parser-upstream.zip
Adding upstream version 0.18.1.upstream/0.18.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tests/test_html')
-rw-r--r--tests/test_html/html_ast.md124
-rw-r--r--tests/test_html/html_round_trip.md87
-rw-r--r--tests/test_html/html_to_nodes.md199
-rw-r--r--tests/test_html/test_html_to_nodes.py35
-rw-r--r--tests/test_html/test_parse_html.py41
5 files changed, 486 insertions, 0 deletions
diff --git a/tests/test_html/html_ast.md b/tests/test_html/html_ast.md
new file mode 100644
index 0000000..0abbdee
--- /dev/null
+++ b/tests/test_html/html_ast.md
@@ -0,0 +1,124 @@
+tags
+.
+<html>
+<head>
+<title class="a b" other="x">Title of the document</title>
+</head>
+<body>
+The content of the document......
+</body>
+</html>
+.
+Root('')
+Tag('html')
+Data('\n')
+Tag('head')
+Data('\n')
+Tag('title', {'class': 'a b', 'other': 'x'})
+Data('Title of the docu...')
+Data('\n')
+Data('\n')
+Tag('body')
+Data('\nThe content of t...')
+Data('\n')
+Data('\n')
+.
+
+un-closed tags
+.
+<div class="a">
+<div class="b">
+.
+Root('')
+Tag('div', {'class': 'a'})
+Data('\n')
+Tag('div', {'class': 'b'})
+Data('\n')
+.
+
+xtag
+.
+<img src="img_girl.jpg" alt="Girl in a jacket" width="500" height="600"/>
+.
+Root('')
+XTag('img', {'src': 'img_girl.jpg', 'alt': 'Girl in a jacket', 'width': '500', 'height': '600'})
+Data('\n')
+.
+
+data
+.
+a
+.
+Root('')
+Data('a\n')
+.
+
+declaration
+.
+<!DOCTYPE html>
+.
+Root('')
+Declaration('DOCTYPE html')
+Data('\n')
+.
+
+process information
+.
+<?xml-stylesheet ?>
+.
+Root('')
+Pi('xml-stylesheet ?')
+Data('\n')
+.
+
+entities
+.
+&amp;
+
+&#123;
+.
+Root('')
+Entity('amp')
+Data('\n\n')
+Char('123')
+Data('\n')
+.
+
+comments
+.
+<!--This is a comment. Comments are not displayed in the browser
+-->
+.
+Root('')
+Comment('This is a comment...')
+Data('\n')
+.
+
+admonition
+.
+<div class="admonition tip alert alert-warning">
+<div class="admonition-title" style="font-weight: bold;">Tip</div>
+parameter allows to get a deterministic results even if we
+use some random process (i.e. data shuffling).
+</div>
+.
+Root('')
+Tag('div', {'class': 'admonition tip alert alert-warning'})
+Data('\n')
+Tag('div', {'class': 'admonition-title', 'style': 'font-weight: bold;'})
+Data('Tip')
+Data('\nparameter allows...')
+Data('\n')
+.
+
+image
+.
+<img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="200px">
+<img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="300px">
+.
+Root('')
+VoidTag('img', {'src': 'img/fun-fish.png', 'alt': 'fishy', 'class': 'bg-primary mb-1', 'width': '200px'})
+Data('\n')
+VoidTag('img', {'src': 'img/fun-fish.png', 'alt': 'fishy', 'class': 'bg-primary mb-1', 'width': '300px'})
+Data('\n')
+.
diff --git a/tests/test_html/html_round_trip.md b/tests/test_html/html_round_trip.md
new file mode 100644
index 0000000..c5ceb5c
--- /dev/null
+++ b/tests/test_html/html_round_trip.md
@@ -0,0 +1,87 @@
+tags
+.
+<html>
+<head>
+<title class="a b" other="x">Title of the document</title>
+</head>
+<body>
+The content of the document......
+</body>
+</html>
+.
+<html>
+<head>
+<title class="a b" other="x">Title of the document</title>
+</head>
+<body>
+The content of the document......
+</body>
+</html>
+.
+
+un-closed tags
+.
+<div class="a">
+<div class="b">
+.
+<div class="a">
+<div class="b">
+</div></div>
+.
+
+xtag
+.
+ <img src="img_girl.jpg" alt="Girl in a jacket" width="500" height="600"/>
+.
+ <img src="img_girl.jpg" alt="Girl in a jacket" width="500" height="600"/>
+.
+
+data
+.
+a
+.
+a
+.
+
+declaration
+.
+<!DOCTYPE html>
+.
+<!DOCTYPE html>
+.
+
+process information
+.
+<?xml-stylesheet ?>
+.
+<?xml-stylesheet ?>
+.
+
+entities
+.
+&amp;
+
+&#123;
+.
+&amp;
+
+&#123;
+.
+
+comments
+.
+<!--This is a comment. Comments are not displayed in the browser
+-->
+.
+<!--This is a comment. Comments are not displayed in the browser
+-->
+.
+
+image
+.
+<img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="200px">
+<img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="300px">
+.
+<img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="200px">
+<img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="300px">
+.
diff --git a/tests/test_html/html_to_nodes.md b/tests/test_html/html_to_nodes.md
new file mode 100644
index 0000000..e118eb9
--- /dev/null
+++ b/tests/test_html/html_to_nodes.md
@@ -0,0 +1,199 @@
+empty
+.
+
+.
+<container>
+ <raw format="html" xml:space="preserve">
+.
+
+text
+.
+abc
+.
+<container>
+ <raw format="html" xml:space="preserve">
+ abc
+.
+
+normal HTML
+.
+<div></div>
+.
+<container>
+ <raw format="html" xml:space="preserve">
+ <div></div>
+.
+
+image no src
+.
+<img>
+.
+<container>
+ <system_message>
+ <paragraph>
+ error
+.
+
+image
+.
+<img src="a">
+.
+<container>
+ <Element first="a" name="image" position="0">
+.
+
+image unknown attribute
+.
+<img src="a" other="b">
+.
+<container>
+ <Element first="a" name="image" position="0">
+.
+
+image known attributes
+.
+<img src="a" height="200px" class="a b" name="b" align="left">
+.
+<container>
+ <Element first="a" name="image" position="0">
+ :align: left
+ :class: a b
+ :height: 200px
+ :name: b
+.
+
+multiple images
+.
+<img src="a">
+<img src="b">
+.
+<container>
+ <Element first="a" name="image" position="0">
+ <Element first="b" name="image" position="0">
+.
+
+admonition no close
+.
+<div class="admonition">
+.
+<container>
+ <Element first="Note" name="admonition" position="0">
+ :class: admonition
+.
+
+admonition
+.
+<div class="admonition">
+</div>
+.
+<container>
+ <Element first="Note" name="admonition" position="0">
+ :class: admonition
+.
+
+admonition attributes
+.
+<div class="admonition tip" name="aname">
+</div>
+.
+<container>
+ <Element first="Note" name="admonition" position="0">
+ :class: admonition tip
+ :name: aname
+.
+
+admonition div-title
+.
+<div class="admonition tip">
+<div class="title">*Hallo*</div>
+.
+<container>
+ <Element first="*Hallo*" name="admonition" position="0">
+ :class: admonition tip
+.
+
+admonition p-title
+.
+<div class="admonition tip">
+<p class="title">*Hallo*</p>
+.
+<container>
+ <Element first="*Hallo*" name="admonition" position="0">
+ :class: admonition tip
+.
+
+admonition title+content
+.
+<div class="admonition">
+<div class="title">*Hallo*</div>
+content
+</div>
+.
+<container>
+ <Element first="*Hallo*" name="admonition" position="0">
+ :class: admonition
+
+ content
+.
+
+admonition multiple
+.
+<div class="admonition">
+<div class="title">first</div>
+content 1
+</div>
+<div class="admonition">
+<div class="title">second</div>
+content 2
+</div>
+.
+<container>
+ <Element first="first" name="admonition" position="0">
+ :class: admonition
+
+ content 1
+ <Element first="second" name="admonition" position="0">
+ :class: admonition
+
+ content 2
+.
+
+admonition with paragraphs
+.
+<div class="admonition">
+<p>paragraph 1</p>
+<p>paragraph 2</p>
+</div>
+.
+<container>
+ <Element first="Note" name="admonition" position="0">
+ :class: admonition
+
+ paragraph 1
+
+ paragraph 2
+.
+
+nested
+.
+<div class="admonition">
+<p>Some **content**</p>
+ <div class="admonition tip">
+ <div class="title">A *title*</div>
+ <p>Paragraph 1</p>
+ <p>Paragraph 2</p>
+ </div>
+</div>
+.
+<container>
+ <Element first="Note" name="admonition" position="0">
+ :class: admonition
+
+ Some **content**
+
+ <div class="admonition tip">
+ <div class="title">A *title*</div>
+ <p>Paragraph 1</p>
+ <p>Paragraph 2</p>
+ </div>
+.
diff --git a/tests/test_html/test_html_to_nodes.py b/tests/test_html/test_html_to_nodes.py
new file mode 100644
index 0000000..207a627
--- /dev/null
+++ b/tests/test_html/test_html_to_nodes.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from unittest.mock import Mock
+
+import pytest
+from docutils import nodes
+
+from myst_parser.config.main import MdParserConfig
+from myst_parser.mdit_to_docutils.html_to_nodes import html_to_nodes
+
+FIXTURE_PATH = Path(__file__).parent
+
+
+@pytest.fixture()
+def mock_renderer():
+ def _run_directive(name: str, first_line: str, content: str, position: int):
+ node = nodes.Element(name=name, first=first_line, position=position)
+ node += nodes.Text(content)
+ return [node]
+
+ return Mock(
+ md_config=MdParserConfig(enable_extensions=["html_image", "html_admonition"]),
+ document={"source": "source"},
+ reporter=Mock(
+ warning=Mock(return_value=nodes.system_message("warning")),
+ error=Mock(return_value=nodes.system_message("error")),
+ ),
+ run_directive=_run_directive,
+ )
+
+
+@pytest.mark.param_file(FIXTURE_PATH / "html_to_nodes.md")
+def test_html_to_nodes(file_params, mock_renderer):
+ output = nodes.container()
+ output += html_to_nodes(file_params.content, line_number=0, renderer=mock_renderer)
+ file_params.assert_expected(output.pformat(), rstrip=True)
diff --git a/tests/test_html/test_parse_html.py b/tests/test_html/test_parse_html.py
new file mode 100644
index 0000000..3b4cdc1
--- /dev/null
+++ b/tests/test_html/test_parse_html.py
@@ -0,0 +1,41 @@
+from pathlib import Path
+
+import pytest
+
+from myst_parser.parsers.parse_html import tokenize_html
+
+FIXTURE_PATH = Path(__file__).parent
+
+
+@pytest.mark.param_file(FIXTURE_PATH / "html_ast.md")
+def test_html_ast(file_params):
+ tokens = "\n".join(
+ repr(t) for t in tokenize_html(file_params.content).walk(include_self=True)
+ )
+ file_params.assert_expected(tokens, rstrip=True)
+
+
+@pytest.mark.param_file(FIXTURE_PATH / "html_round_trip.md")
+def test_html_round_trip(file_params):
+ ast = tokenize_html(file_params.content)
+ file_params.assert_expected(str(ast), rstrip=True)
+
+
+def test_render_overrides():
+ text = "<div><abc></abc></div>"
+ ast = tokenize_html(text)
+
+ def _render_abc(element, *args, **kwargs):
+ return "hallo"
+
+ output = ast.render(tag_overrides={"abc": _render_abc})
+ assert output == "<div>hallo</div>"
+
+
+def test_ast_find():
+ text = (
+ '<div class="a"><div class="c"><x/><y>z</y><div class="a b"></div></div></div>'
+ )
+ ast = tokenize_html(text)
+ found = list(ast.find("div", classes=["a"]))
+ assert [e.attrs.classes for e in found] == [["a"], ["a", "b"]]