1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
"""Convert HTML to docutils nodes."""
from __future__ import annotations
import re
from typing import TYPE_CHECKING
from docutils import nodes
from myst_parser.parsers.parse_html import Data, tokenize_html
if TYPE_CHECKING:
from .base import DocutilsRenderer
def make_error(
document: nodes.document, error_msg: str, text: str, line_number: int
) -> nodes.system_message:
return document.reporter.error(
error_msg,
nodes.literal_block(text, text),
line=line_number,
)
OPTION_KEYS_IMAGE = {"class", "alt", "height", "width", "align", "name"}
# note: docutils also has scale and target
OPTION_KEYS_ADMONITION = {"class", "name"}
# See https://github.com/micromark/micromark-extension-gfm-tagfilter
RE_FLOW = re.compile(
r"<(\/?)(iframe|noembed|noframes|plaintext|script|style|title|textarea|xmp)(?=[\t\n\f\r />])",
re.IGNORECASE,
)
def default_html(text: str, source: str, line_number: int) -> list[nodes.Element]:
raw_html = nodes.raw("", text, format="html")
raw_html.source = source
raw_html.line = line_number
return [raw_html]
def html_to_nodes(
text: str, line_number: int, renderer: DocutilsRenderer
) -> list[nodes.Element]:
"""Convert HTML to docutils nodes."""
if renderer.md_config.gfm_only:
text, _ = RE_FLOW.subn(lambda s: s.group(0).replace("<", "<"), text)
enable_html_img = "html_image" in renderer.md_config.enable_extensions
enable_html_admonition = "html_admonition" in renderer.md_config.enable_extensions
if not (enable_html_img or enable_html_admonition):
return default_html(text, renderer.document["source"], line_number)
# parse the HTML to AST
try:
root = tokenize_html(text).strip(inplace=True, recurse=False)
except Exception:
msg_node = renderer.create_warning(
"HTML could not be parsed", line=line_number, subtype="html"
)
return ([msg_node] if msg_node else []) + default_html(
text, renderer.document["source"], line_number
)
if len(root) < 1:
# if empty
return default_html(text, renderer.document["source"], line_number)
if not all(
(enable_html_img and child.name == "img")
or (
enable_html_admonition
and child.name == "div"
and "admonition" in child.attrs.classes
)
for child in root
):
return default_html(text, renderer.document["source"], line_number)
nodes_list = []
for child in root:
if child.name == "img":
if "src" not in child.attrs:
return [
renderer.reporter.error(
"<img> missing 'src' attribute", line=line_number
)
]
content = "\n".join(
f":{k}: {v}"
for k, v in sorted(child.attrs.items())
if k in OPTION_KEYS_IMAGE
)
nodes_list.extend(
renderer.run_directive(
"image", child.attrs["src"], content, line_number
)
)
else:
children = child.strip().children
if (
children
and children[0].name in ("div", "p")
and (
"title" in children[0].attrs.classes
or "admonition-title" in children[0].attrs.classes
)
):
title = "".join(child.render() for child in children.pop(0))
else:
title = "Note"
options = "\n".join(
f":{k}: {v}"
for k, v in sorted(child.attrs.items())
if k in OPTION_KEYS_ADMONITION
).rstrip()
new_children = []
for child in children:
if child.name == "p":
new_children.extend(child.children)
new_children.append(Data("\n\n"))
else:
new_children.append(child)
content = (
options
+ ("\n\n" if options else "")
+ "".join(child.render() for child in new_children).lstrip()
)
nodes_list.extend(
renderer.run_directive("admonition", title, content, line_number)
)
return nodes_list
|