From 943e3dc057eca53e68ddec51529bd6a1279ebd8e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 29 Apr 2024 06:23:02 +0200 Subject: Adding upstream version 0.18.1. Signed-off-by: Daniel Baumann --- myst_parser/mocking.py | 514 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 514 insertions(+) create mode 100644 myst_parser/mocking.py (limited to 'myst_parser/mocking.py') diff --git a/myst_parser/mocking.py b/myst_parser/mocking.py new file mode 100644 index 0000000..b22475d --- /dev/null +++ b/myst_parser/mocking.py @@ -0,0 +1,514 @@ +"""This module provides classes to Mock the core components of the docutils.RSTParser, +the key difference being that nested parsing treats the text as Markdown not rST. +""" +from __future__ import annotations + +import os +import re +import sys +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from docutils import nodes +from docutils.parsers.rst import Directive, DirectiveError +from docutils.parsers.rst import Parser as RSTParser +from docutils.parsers.rst.directives.misc import Include +from docutils.parsers.rst.states import Body, Inliner, RSTStateMachine +from docutils.statemachine import StringList +from docutils.utils import unescape + +from .parsers.directives import parse_directive_text + +if TYPE_CHECKING: + from .mdit_to_docutils.base import DocutilsRenderer + + +class MockingError(Exception): + """An exception to signal an error during mocking of docutils components.""" + + +class MockInliner: + """A mock version of `docutils.parsers.rst.states.Inliner`. + + This is parsed to role functions. + """ + + def __init__(self, renderer: DocutilsRenderer): + """Initialize the mock inliner.""" + self._renderer = renderer + # here we mock that the `parse` method has already been called + # which is where these attributes are set (via the RST state Memo) + self.document = renderer.document + self.reporter = renderer.document.reporter + self.language = renderer.language_module_rst + self.parent = renderer.current_node + + if not hasattr(self.reporter, "get_source_and_line"): + # In docutils this is set by `RSTState.runtime_init` + self.reporter.get_source_and_line = lambda l: (self.document["source"], l) + + self.rfc_url = "rfc%d.html" + + def problematic( + self, text: str, rawsource: str, message: nodes.system_message + ) -> nodes.problematic: + """Record a system message from parsing.""" + msgid = self.document.set_id(message, self.parent) + problematic = nodes.problematic(rawsource, text, refid=msgid) + prbid = self.document.set_id(problematic) + message.add_backref(prbid) + return problematic + + def parse( + self, text: str, lineno: int, memo: Any, parent: nodes.Node + ) -> tuple[list[nodes.Node], list[nodes.system_message]]: + """Parse the text and return a list of nodes.""" + # note the only place this is normally called, + # is by `RSTState.inline_text`, or in directives: `self.state.inline_text`, + # and there the state parses its own parent + # self.reporter = memo.reporter + # self.document = memo.document + # self.language = memo.language + with self._renderer.current_node_context(parent): + # the parent is never actually appended to though, + # so we make a temporary parent to parse into + container = nodes.Element() + with self._renderer.current_node_context(container): + self._renderer.nested_render_text(text, lineno, inline=True) + + return container.children, [] + + def __getattr__(self, name: str): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + # TODO use document.reporter mechanism? + if hasattr(Inliner, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = f"{type(self).__name__} has no attribute {name}" + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockState: + """A mock version of `docutils.parsers.rst.states.RSTState`. + + This is parsed to the `Directives.run()` method, + so that they may run nested parses on their content that will be parsed as markdown, + rather than RST. + """ + + def __init__( + self, + renderer: DocutilsRenderer, + state_machine: MockStateMachine, + lineno: int, + ): + self._renderer = renderer + self._lineno = lineno + self.document = renderer.document + self.reporter = renderer.document.reporter + self.state_machine = state_machine + self.inliner = MockInliner(renderer) + + class Struct: + document = self.document + reporter = self.document.reporter + language = renderer.language_module_rst + title_styles: list[str] = [] + section_level = max(renderer._level_to_elem) + section_bubble_up_kludge = False + inliner = self.inliner + + self.memo = Struct + + def parse_directive_block( + self, + content: StringList, + line_offset: int, + directive: type[Directive], + option_presets: dict, + ) -> tuple[list, dict, StringList, int]: + """Parse the full directive text + + :returns: (arguments, options, content, content_offset) + """ + if option_presets: + raise MockingError("parse_directive_block: option_presets not implemented") + # TODO should argument_str always be ""? + arguments, options, body_lines, content_offset = parse_directive_text( + directive, "", "\n".join(content) + ) + return ( + arguments, + options, + StringList(body_lines, source=content.source), + line_offset + content_offset, + ) + + def nested_parse( + self, + block: StringList, + input_offset: int, + node: nodes.Element, + match_titles: bool = False, + state_machine_class=None, + state_machine_kwargs=None, + ) -> None: + """Perform a nested parse of the input block, with ``node`` as the parent. + + :param block: The block of lines to parse. + :param input_offset: The offset of the first line of block, + to the starting line of the state (i.e. directive). + :param node: The parent node to attach the parsed content to. + :param match_titles: Whether to to allow the parsing of headings + (normally this is false, + since nested heading would break the document structure) + """ + sm_match_titles = self.state_machine.match_titles + with self._renderer.current_node_context(node): + self._renderer.nested_render_text( + "\n".join(block), + self._lineno + input_offset, + allow_headings=match_titles, + ) + self.state_machine.match_titles = sm_match_titles + + def parse_target(self, block, block_text, lineno: int): + """ + Taken from https://github.com/docutils-mirror/docutils/blob/e88c5fb08d5cdfa8b4ac1020dd6f7177778d5990/docutils/parsers/rst/states.py#L1927 # noqa: E501 + """ + # Commenting out this code because it only applies to rST + # if block and block[-1].strip()[-1:] == "_": # possible indirect target + # reference = " ".join([line.strip() for line in block]) + # refname = self.is_reference(reference) + # if refname: + # return "refname", refname + reference = "".join(["".join(line.split()) for line in block]) + return "refuri", unescape(reference) + + def inline_text( + self, text: str, lineno: int + ) -> tuple[list[nodes.Element], list[nodes.Element]]: + """Parse text with only inline rules. + + :returns: (list of nodes, list of messages) + """ + return self.inliner.parse(text, lineno, self.memo, self._renderer.current_node) + + # U+2014 is an em-dash: + attribution_pattern = re.compile("^((?:---?(?!-)|\u2014) *)(.+)") + + def block_quote(self, lines: list[str], line_offset: int) -> list[nodes.Element]: + """Parse a block quote, which is a block of text, + followed by an (optional) attribution. + + :: + + No matter where you go, there you are. + + -- Buckaroo Banzai + """ + elements = [] + # split attribution + last_line_blank = False + blockquote_lines = lines + attribution_lines = [] + attribution_line_offset = None + # First line after a blank line must begin with a dash + for i, line in enumerate(lines): + if not line.strip(): + last_line_blank = True + continue + if not last_line_blank: + last_line_blank = False + continue + last_line_blank = False + match = self.attribution_pattern.match(line) + if not match: + continue + attribution_line_offset = i + attribution_lines = [match.group(2)] + for at_line in lines[i + 1 :]: + indented_line = at_line[len(match.group(1)) :] + if len(indented_line) != len(at_line.lstrip()): + break + attribution_lines.append(indented_line) + blockquote_lines = lines[:i] + break + # parse block + blockquote = nodes.block_quote() + self.nested_parse(blockquote_lines, line_offset, blockquote) + elements.append(blockquote) + # parse attribution + if attribution_lines: + attribution_text = "\n".join(attribution_lines) + lineno = self._lineno + line_offset + (attribution_line_offset or 0) + textnodes, messages = self.inline_text(attribution_text, lineno) + attribution = nodes.attribution(attribution_text, "", *textnodes) + ( + attribution.source, + attribution.line, + ) = self.state_machine.get_source_and_line(lineno) + blockquote += attribution + elements += messages + return elements + + def build_table(self, tabledata, tableline, stub_columns: int = 0, widths=None): + return Body.build_table(self, tabledata, tableline, stub_columns, widths) + + def build_table_row(self, rowdata, tableline): + return Body.build_table_row(self, rowdata, tableline) + + def __getattr__(self, name: str): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + cls = type(self).__name__ + if hasattr(Body, name): + msg = ( + f"{cls} has not yet implemented attribute '{name}'. " + "You can parse RST directly via the `{eval-rst}` directive: " + "https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#how-directives-parse-content" # noqa: E501 + ) + else: + # The requested `name` is not a docutils Body element + # (such as "footnote", "block_quote", "paragraph", …) + msg = f"{cls} has no attribute '{name}'" + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockStateMachine: + """A mock version of `docutils.parsers.rst.states.RSTStateMachine`. + + This is parsed to the `Directives.run()` method. + """ + + def __init__(self, renderer: DocutilsRenderer, lineno: int): + self._renderer = renderer + self._lineno = lineno + self.document = renderer.document + self.language = renderer.language_module_rst + self.reporter = self.document.reporter + self.node: nodes.Element = renderer.current_node + self.match_titles: bool = True + + def get_source(self, lineno: int | None = None): + """Return document source path.""" + return self.document["source"] + + def get_source_and_line(self, lineno: int | None = None): + """Return (source path, line) tuple for current or given line number.""" + return self.document["source"], lineno or self._lineno + + def __getattr__(self, name: str): + """This method is only be called if the attribute requested has not + been defined. Defined attributes will not be overridden. + """ + if hasattr(RSTStateMachine, name): + msg = "{cls} has not yet implemented attribute '{name}'".format( + cls=type(self).__name__, name=name + ) + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + msg = f"{type(self).__name__} has no attribute {name}" + raise MockingError(msg).with_traceback(sys.exc_info()[2]) + + +class MockIncludeDirective: + """This directive uses a lot of statemachine logic that is not yet mocked. + Therefore, we treat it as a special case (at least for now). + + See: + https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment + """ + + def __init__( + self, + renderer: DocutilsRenderer, + name: str, + klass: Include, + arguments: list, + options: dict, + body: list[str], + lineno: int, + ): + self.renderer = renderer + self.document = renderer.document + self.name = name + self.klass = klass + self.arguments = arguments + self.options = options + self.body = body + self.lineno = lineno + + def run(self) -> list[nodes.Element]: + + from docutils.parsers.rst.directives.body import CodeBlock, NumberLines + + if not self.document.settings.file_insertion_enabled: + raise DirectiveError(2, f'Directive "{self.name}" disabled.') + + source_dir = Path(self.document["source"]).absolute().parent + include_arg = "".join([s.strip() for s in self.arguments[0].splitlines()]) + + if include_arg.startswith("<") and include_arg.endswith(">"): + # # docutils "standard" includes + path = Path(self.klass.standard_include_path).joinpath(include_arg[1:-1]) + else: + # if using sphinx interpret absolute paths "correctly", + # i.e. relative to source directory + try: + sphinx_env = self.document.settings.env + except AttributeError: + pass + else: + _, include_arg = sphinx_env.relfn2path(self.arguments[0]) + sphinx_env.note_included(include_arg) + path = Path(include_arg) + path = source_dir.joinpath(path) + # this ensures that the parent file is rebuilt if the included file changes + self.document.settings.record_dependencies.add(str(path)) + + # read file + encoding = self.options.get("encoding", self.document.settings.input_encoding) + error_handler = self.document.settings.input_encoding_error_handler + # tab_width = self.options.get("tab-width", self.document.settings.tab_width) + try: + file_content = path.read_text(encoding=encoding, errors=error_handler) + except Exception as error: + raise DirectiveError( + 4, + 'Directive "{}": error reading file: {}\n{}.'.format( + self.name, path, error + ), + ) + + # get required section of text + startline = self.options.get("start-line", None) + endline = self.options.get("end-line", None) + file_content = "\n".join(file_content.splitlines()[startline:endline]) + startline = startline or 0 + for split_on_type in ["start-after", "end-before"]: + split_on = self.options.get(split_on_type, None) + if not split_on: + continue + split_index = file_content.find(split_on) + if split_index < 0: + raise DirectiveError( + 4, + 'Directive "{}"; option "{}": text not found "{}".'.format( + self.name, split_on_type, split_on + ), + ) + if split_on_type == "start-after": + startline += split_index + len(split_on) + file_content = file_content[split_index + len(split_on) :] + else: + file_content = file_content[:split_index] + + if "literal" in self.options: + literal_block = nodes.literal_block( + file_content, source=str(path), classes=self.options.get("class", []) + ) + literal_block.line = 1 # TODO don;t think this should be 1? + self.add_name(literal_block) + if "number-lines" in self.options: + try: + startline = int(self.options["number-lines"] or 1) + except ValueError: + raise DirectiveError( + 3, ":number-lines: with non-integer " "start value" + ) + endline = startline + len(file_content.splitlines()) + if file_content.endswith("\n"): + file_content = file_content[:-1] + tokens = NumberLines([([], file_content)], startline, endline) + for classes, value in tokens: + if classes: + literal_block += nodes.inline(value, value, classes=classes) + else: + literal_block += nodes.Text(value) + else: + literal_block += nodes.Text(file_content) + return [literal_block] + if "code" in self.options: + self.options["source"] = str(path) + state_machine = MockStateMachine(self.renderer, self.lineno) + state = MockState(self.renderer, state_machine, self.lineno) + codeblock = CodeBlock( + name=self.name, + arguments=[self.options.pop("code")], + options=self.options, + content=file_content.splitlines(), + lineno=self.lineno, + content_offset=0, + block_text=file_content, + state=state, + state_machine=state_machine, + ) + return codeblock.run() + + # Here we perform a nested render, but temporarily setup the document/reporter + # with the correct document path and lineno for the included file. + source = self.renderer.document["source"] + rsource = self.renderer.reporter.source + line_func = getattr(self.renderer.reporter, "get_source_and_line", None) + try: + self.renderer.document["source"] = str(path) + self.renderer.reporter.source = str(path) + self.renderer.reporter.get_source_and_line = lambda l: (str(path), l) + if "relative-images" in self.options: + self.renderer.md_env["relative-images"] = os.path.relpath( + path.parent, source_dir + ) + if "relative-docs" in self.options: + self.renderer.md_env["relative-docs"] = ( + self.options["relative-docs"], + source_dir, + path.parent, + ) + self.renderer.nested_render_text( + file_content, startline + 1, allow_headings=True + ) + finally: + self.renderer.document["source"] = source + self.renderer.reporter.source = rsource + self.renderer.md_env.pop("relative-images", None) + self.renderer.md_env.pop("relative-docs", None) + if line_func is not None: + self.renderer.reporter.get_source_and_line = line_func + else: + del self.renderer.reporter.get_source_and_line + return [] + + def add_name(self, node: nodes.Element): + """Append self.options['name'] to node['names'] if it exists. + + Also normalize the name string and register it as explicit target. + """ + if "name" in self.options: + name = nodes.fully_normalize_name(self.options.pop("name")) + if "name" in node: + del node["name"] + node["names"].append(name) + self.renderer.document.note_explicit_target(node, node) + + +class MockRSTParser(RSTParser): + """RSTParser which avoids a negative side effect.""" + + def parse(self, inputstring: str, document: nodes.document): + """Parse the input to populate the document AST.""" + from docutils.parsers.rst import roles + + should_restore = False + if "" in roles._roles: + should_restore = True + blankrole = roles._roles[""] + + super().parse(inputstring, document) + + if should_restore: + roles._roles[""] = blankrole -- cgit v1.2.3