1 files changed, 514 insertions, 0 deletions
diff --git a/myst_parser/mocking.py b/myst_parser/mocking.py
new file mode 100644
index 0000000..b22475d
--- /dev/null
+++ b/myst_parser/mocking.py
@@ -0,0 +1,514 @@
+"""This module provides classes to Mock the core components of the docutils.RSTParser,
+the key difference being that nested parsing treats the text as Markdown not rST.
+"""
+from __future__ import annotations
+
+import os
+import re
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from docutils import nodes
+from docutils.parsers.rst import Directive, DirectiveError
+from docutils.parsers.rst import Parser as RSTParser
+from docutils.parsers.rst.directives.misc import Include
+from docutils.parsers.rst.states import Body, Inliner, RSTStateMachine
+from docutils.statemachine import StringList
+from docutils.utils import unescape
+
+from .parsers.directives import parse_directive_text
+
+if TYPE_CHECKING:
+    from .mdit_to_docutils.base import DocutilsRenderer
+
+
+class MockingError(Exception):
+    """An exception to signal an error during mocking of docutils components."""
+
+
+class MockInliner:
+    """A mock version of `docutils.parsers.rst.states.Inliner`.
+
+    This is parsed to role functions.
+    """
+
+    def __init__(self, renderer: DocutilsRenderer):
+        """Initialize the mock inliner."""
+        self._renderer = renderer
+        # here we mock that the `parse` method has already been called
+        # which is where these attributes are set (via the RST state Memo)
+        self.document = renderer.document
+        self.reporter = renderer.document.reporter
+        self.language = renderer.language_module_rst
+        self.parent = renderer.current_node
+
+        if not hasattr(self.reporter, "get_source_and_line"):
+            # In docutils this is set by `RSTState.runtime_init`
+            self.reporter.get_source_and_line = lambda l: (self.document["source"], l)
+
+        self.rfc_url = "rfc%d.html"
+
+    def problematic(
+        self, text: str, rawsource: str, message: nodes.system_message
+    ) -> nodes.problematic:
+        """Record a system message from parsing."""
+        msgid = self.document.set_id(message, self.parent)
+        problematic = nodes.problematic(rawsource, text, refid=msgid)
+        prbid = self.document.set_id(problematic)
+        message.add_backref(prbid)
+        return problematic
+
+    def parse(
+        self, text: str, lineno: int, memo: Any, parent: nodes.Node
+    ) -> tuple[list[nodes.Node], list[nodes.system_message]]:
+        """Parse the text and return a list of nodes."""
+        # note the only place this is normally called,
+        # is by `RSTState.inline_text`, or in directives: `self.state.inline_text`,
+        # and there the state parses its own parent
+        # self.reporter = memo.reporter
+        # self.document = memo.document
+        # self.language = memo.language
+        with self._renderer.current_node_context(parent):
+            # the parent is never actually appended to though,
+            # so we make a temporary parent to parse into
+            container = nodes.Element()
+            with self._renderer.current_node_context(container):
+                self._renderer.nested_render_text(text, lineno, inline=True)
+
+        return container.children, []
+
+    def __getattr__(self, name: str):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        # TODO use document.reporter mechanism?
+        if hasattr(Inliner, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = f"{type(self).__name__} has no attribute {name}"
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockState:
+    """A mock version of `docutils.parsers.rst.states.RSTState`.
+
+    This is parsed to the `Directives.run()` method,
+    so that they may run nested parses on their content that will be parsed as markdown,
+    rather than RST.
+    """
+
+    def __init__(
+        self,
+        renderer: DocutilsRenderer,
+        state_machine: MockStateMachine,
+        lineno: int,
+    ):
+        self._renderer = renderer
+        self._lineno = lineno
+        self.document = renderer.document
+        self.reporter = renderer.document.reporter
+        self.state_machine = state_machine
+        self.inliner = MockInliner(renderer)
+
+        class Struct:
+            document = self.document
+            reporter = self.document.reporter
+            language = renderer.language_module_rst
+            title_styles: list[str] = []
+            section_level = max(renderer._level_to_elem)
+            section_bubble_up_kludge = False
+            inliner = self.inliner
+
+        self.memo = Struct
+
+    def parse_directive_block(
+        self,
+        content: StringList,
+        line_offset: int,
+        directive: type[Directive],
+        option_presets: dict,
+    ) -> tuple[list, dict, StringList, int]:
+        """Parse the full directive text
+
+        :returns: (arguments, options, content, content_offset)
+        """
+        if option_presets:
+            raise MockingError("parse_directive_block: option_presets not implemented")
+        # TODO should argument_str always be ""?
+        arguments, options, body_lines, content_offset = parse_directive_text(
+            directive, "", "\n".join(content)
+        )
+        return (
+            arguments,
+            options,
+            StringList(body_lines, source=content.source),
+            line_offset + content_offset,
+        )
+
+    def nested_parse(
+        self,
+        block: StringList,
+        input_offset: int,
+        node: nodes.Element,
+        match_titles: bool = False,
+        state_machine_class=None,
+        state_machine_kwargs=None,
+    ) -> None:
+        """Perform a nested parse of the input block, with ``node`` as the parent.
+
+        :param block: The block of lines to parse.
+        :param input_offset: The offset of the first line of block,
+            to the starting line of the state (i.e. directive).
+        :param node: The parent node to attach the parsed content to.
+        :param match_titles: Whether to to allow the parsing of headings
+            (normally this is false,
+            since nested heading would break the document structure)
+        """
+        sm_match_titles = self.state_machine.match_titles
+        with self._renderer.current_node_context(node):
+            self._renderer.nested_render_text(
+                "\n".join(block),
+                self._lineno + input_offset,
+                allow_headings=match_titles,
+            )
+        self.state_machine.match_titles = sm_match_titles
+
+    def parse_target(self, block, block_text, lineno: int):
+        """
+        Taken from https://github.com/docutils-mirror/docutils/blob/e88c5fb08d5cdfa8b4ac1020dd6f7177778d5990/docutils/parsers/rst/states.py#L1927  # noqa: E501
+        """
+        # Commenting out this code because it only applies to rST
+        # if block and block[-1].strip()[-1:] == "_":  # possible indirect target
+        #     reference = " ".join([line.strip() for line in block])
+        #     refname = self.is_reference(reference)
+        #     if refname:
+        #         return "refname", refname
+        reference = "".join(["".join(line.split()) for line in block])
+        return "refuri", unescape(reference)
+
+    def inline_text(
+        self, text: str, lineno: int
+    ) -> tuple[list[nodes.Element], list[nodes.Element]]:
+        """Parse text with only inline rules.
+
+        :returns: (list of nodes, list of messages)
+        """
+        return self.inliner.parse(text, lineno, self.memo, self._renderer.current_node)
+
+    # U+2014 is an em-dash:
+    attribution_pattern = re.compile("^((?:---?(?!-)|\u2014) *)(.+)")
+
+    def block_quote(self, lines: list[str], line_offset: int) -> list[nodes.Element]:
+        """Parse a block quote, which is a block of text,
+        followed by an (optional) attribution.
+
+        ::
+
+           No matter where you go, there you are.
+
+           -- Buckaroo Banzai
+        """
+        elements = []
+        # split attribution
+        last_line_blank = False
+        blockquote_lines = lines
+        attribution_lines = []
+        attribution_line_offset = None
+        # First line after a blank line must begin with a dash
+        for i, line in enumerate(lines):
+            if not line.strip():
+                last_line_blank = True
+                continue
+            if not last_line_blank:
+                last_line_blank = False
+                continue
+            last_line_blank = False
+            match = self.attribution_pattern.match(line)
+            if not match:
+                continue
+            attribution_line_offset = i
+            attribution_lines = [match.group(2)]
+            for at_line in lines[i + 1 :]:
+                indented_line = at_line[len(match.group(1)) :]
+                if len(indented_line) != len(at_line.lstrip()):
+                    break
+                attribution_lines.append(indented_line)
+            blockquote_lines = lines[:i]
+            break
+        # parse block
+        blockquote = nodes.block_quote()
+        self.nested_parse(blockquote_lines, line_offset, blockquote)
+        elements.append(blockquote)
+        # parse attribution
+        if attribution_lines:
+            attribution_text = "\n".join(attribution_lines)
+            lineno = self._lineno + line_offset + (attribution_line_offset or 0)
+            textnodes, messages = self.inline_text(attribution_text, lineno)
+            attribution = nodes.attribution(attribution_text, "", *textnodes)
+            (
+                attribution.source,
+                attribution.line,
+            ) = self.state_machine.get_source_and_line(lineno)
+            blockquote += attribution
+            elements += messages
+        return elements
+
+    def build_table(self, tabledata, tableline, stub_columns: int = 0, widths=None):
+        return Body.build_table(self, tabledata, tableline, stub_columns, widths)
+
+    def build_table_row(self, rowdata, tableline):
+        return Body.build_table_row(self, rowdata, tableline)
+
+    def __getattr__(self, name: str):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        cls = type(self).__name__
+        if hasattr(Body, name):
+            msg = (
+                f"{cls} has not yet implemented attribute '{name}'. "
+                "You can parse RST directly via the `{eval-rst}` directive: "
+                "https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#how-directives-parse-content"  # noqa: E501
+            )
+        else:
+            # The requested `name` is not a docutils Body element
+            # (such as "footnote", "block_quote", "paragraph", …)
+            msg = f"{cls} has no attribute '{name}'"
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockStateMachine:
+    """A mock version of `docutils.parsers.rst.states.RSTStateMachine`.
+
+    This is parsed to the `Directives.run()` method.
+    """
+
+    def __init__(self, renderer: DocutilsRenderer, lineno: int):
+        self._renderer = renderer
+        self._lineno = lineno
+        self.document = renderer.document
+        self.language = renderer.language_module_rst
+        self.reporter = self.document.reporter
+        self.node: nodes.Element = renderer.current_node
+        self.match_titles: bool = True
+
+    def get_source(self, lineno: int | None = None):
+        """Return document source path."""
+        return self.document["source"]
+
+    def get_source_and_line(self, lineno: int | None = None):
+        """Return (source path, line) tuple for current or given line number."""
+        return self.document["source"], lineno or self._lineno
+
+    def __getattr__(self, name: str):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        if hasattr(RSTStateMachine, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = f"{type(self).__name__} has no attribute {name}"
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockIncludeDirective:
+    """This directive uses a lot of statemachine logic that is not yet mocked.
+    Therefore, we treat it as a special case (at least for now).
+
+    See:
+    https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment
+    """
+
+    def __init__(
+        self,
+        renderer: DocutilsRenderer,
+        name: str,
+        klass: Include,
+        arguments: list,
+        options: dict,
+        body: list[str],
+        lineno: int,
+    ):
+        self.renderer = renderer
+        self.document = renderer.document
+        self.name = name
+        self.klass = klass
+        self.arguments = arguments
+        self.options = options
+        self.body = body
+        self.lineno = lineno
+
+    def run(self) -> list[nodes.Element]:
+
+        from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
+
+        if not self.document.settings.file_insertion_enabled:
+            raise DirectiveError(2, f'Directive "{self.name}" disabled.')
+
+        source_dir = Path(self.document["source"]).absolute().parent
+        include_arg = "".join([s.strip() for s in self.arguments[0].splitlines()])
+
+        if include_arg.startswith("<") and include_arg.endswith(">"):
+            # # docutils "standard" includes
+            path = Path(self.klass.standard_include_path).joinpath(include_arg[1:-1])
+        else:
+            # if using sphinx interpret absolute paths "correctly",
+            # i.e. relative to source directory
+            try:
+                sphinx_env = self.document.settings.env
+            except AttributeError:
+                pass
+            else:
+                _, include_arg = sphinx_env.relfn2path(self.arguments[0])
+                sphinx_env.note_included(include_arg)
+            path = Path(include_arg)
+        path = source_dir.joinpath(path)
+        # this ensures that the parent file is rebuilt if the included file changes
+        self.document.settings.record_dependencies.add(str(path))
+
+        # read file
+        encoding = self.options.get("encoding", self.document.settings.input_encoding)
+        error_handler = self.document.settings.input_encoding_error_handler
+        # tab_width = self.options.get("tab-width", self.document.settings.tab_width)
+        try:
+            file_content = path.read_text(encoding=encoding, errors=error_handler)
+        except Exception as error:
+            raise DirectiveError(
+                4,
+                'Directive "{}": error reading file: {}\n{}.'.format(
+                    self.name, path, error
+                ),
+            )
+
+        # get required section of text
+        startline = self.options.get("start-line", None)
+        endline = self.options.get("end-line", None)
+        file_content = "\n".join(file_content.splitlines()[startline:endline])
+        startline = startline or 0
+        for split_on_type in ["start-after", "end-before"]:
+            split_on = self.options.get(split_on_type, None)
+            if not split_on:
+                continue
+            split_index = file_content.find(split_on)
+            if split_index < 0:
+                raise DirectiveError(
+                    4,
+                    'Directive "{}"; option "{}": text not found "{}".'.format(
+                        self.name, split_on_type, split_on
+                    ),
+                )
+            if split_on_type == "start-after":
+                startline += split_index + len(split_on)
+                file_content = file_content[split_index + len(split_on) :]
+            else:
+                file_content = file_content[:split_index]
+
+        if "literal" in self.options:
+            literal_block = nodes.literal_block(
+                file_content, source=str(path), classes=self.options.get("class", [])
+            )
+            literal_block.line = 1  # TODO don;t think this should be 1?
+            self.add_name(literal_block)
+            if "number-lines" in self.options:
+                try:
+                    startline = int(self.options["number-lines"] or 1)
+                except ValueError:
+                    raise DirectiveError(
+                        3, ":number-lines: with non-integer " "start value"
+                    )
+                endline = startline + len(file_content.splitlines())
+                if file_content.endswith("\n"):
+                    file_content = file_content[:-1]
+                tokens = NumberLines([([], file_content)], startline, endline)
+                for classes, value in tokens:
+                    if classes:
+                        literal_block += nodes.inline(value, value, classes=classes)
+                    else:
+                        literal_block += nodes.Text(value)
+            else:
+                literal_block += nodes.Text(file_content)
+            return [literal_block]
+        if "code" in self.options:
+            self.options["source"] = str(path)
+            state_machine = MockStateMachine(self.renderer, self.lineno)
+            state = MockState(self.renderer, state_machine, self.lineno)
+            codeblock = CodeBlock(
+                name=self.name,
+                arguments=[self.options.pop("code")],
+                options=self.options,
+                content=file_content.splitlines(),
+                lineno=self.lineno,
+                content_offset=0,
+                block_text=file_content,
+                state=state,
+                state_machine=state_machine,
+            )
+            return codeblock.run()
+
+        # Here we perform a nested render, but temporarily setup the document/reporter
+        # with the correct document path and lineno for the included file.
+        source = self.renderer.document["source"]
+        rsource = self.renderer.reporter.source
+        line_func = getattr(self.renderer.reporter, "get_source_and_line", None)
+        try:
+            self.renderer.document["source"] = str(path)
+            self.renderer.reporter.source = str(path)
+            self.renderer.reporter.get_source_and_line = lambda l: (str(path), l)
+            if "relative-images" in self.options:
+                self.renderer.md_env["relative-images"] = os.path.relpath(
+                    path.parent, source_dir
+                )
+            if "relative-docs" in self.options:
+                self.renderer.md_env["relative-docs"] = (
+                    self.options["relative-docs"],
+                    source_dir,
+                    path.parent,
+                )
+            self.renderer.nested_render_text(
+                file_content, startline + 1, allow_headings=True
+            )
+        finally:
+            self.renderer.document["source"] = source
+            self.renderer.reporter.source = rsource
+            self.renderer.md_env.pop("relative-images", None)
+            self.renderer.md_env.pop("relative-docs", None)
+            if line_func is not None:
+                self.renderer.reporter.get_source_and_line = line_func
+            else:
+                del self.renderer.reporter.get_source_and_line
+        return []
+
+    def add_name(self, node: nodes.Element):
+        """Append self.options['name'] to node['names'] if it exists.
+
+        Also normalize the name string and register it as explicit target.
+        """
+        if "name" in self.options:
+            name = nodes.fully_normalize_name(self.options.pop("name"))
+            if "name" in node:
+                del node["name"]
+            node["names"].append(name)
+            self.renderer.document.note_explicit_target(node, node)
+
+
+class MockRSTParser(RSTParser):
+    """RSTParser which avoids a negative side effect."""
+
+    def parse(self, inputstring: str, document: nodes.document):
+        """Parse the input to populate the document AST."""
+        from docutils.parsers.rst import roles
+
+        should_restore = False
+        if "" in roles._roles:
+            should_restore = True
+            blankrole = roles._roles[""]
+
+        super().parse(inputstring, document)
+
+        if should_restore:
+            roles._roles[""] = blankrole