5 files changed, 597 insertions, 0 deletions
diff --git a/myst_parser/sphinx_ext/__init__.py b/myst_parser/sphinx_ext/__init__.py
new file mode 100644
index 0000000..1bfeb71
--- /dev/null
+++ b/myst_parser/sphinx_ext/__init__.py
@@ -0,0 +1 @@
+"""Sphinx extension for myst_parser."""
diff --git a/myst_parser/sphinx_ext/directives.py b/myst_parser/sphinx_ext/directives.py
new file mode 100644
index 0000000..39ca2c6
--- /dev/null
+++ b/myst_parser/sphinx_ext/directives.py
@@ -0,0 +1,136 @@
+"""MyST specific directives"""
+from copy import copy
+from typing import List, Tuple, cast
+
+from docutils import nodes
+from docutils.parsers.rst import directives
+from sphinx.directives import SphinxDirective
+from sphinx.util.docutils import SphinxRole
+
+from myst_parser.mocking import MockState
+
+
+def align(argument):
+    return directives.choice(argument, ("left", "center", "right"))
+
+
+def figwidth_value(argument):
+    if argument.lower() == "image":
+        return "image"
+    else:
+        return directives.length_or_percentage_or_unitless(argument, "px")
+
+
+class SubstitutionReferenceRole(SphinxRole):
+    """Implement substitution references as a role.
+
+    Note, in ``docutils/parsers/rst/roles.py`` this is left unimplemented.
+    """
+
+    def run(self) -> Tuple[List[nodes.Node], List[nodes.system_message]]:
+        subref_node = nodes.substitution_reference(self.rawtext, self.text)
+        self.set_source_info(subref_node, self.lineno)
+        subref_node["refname"] = nodes.fully_normalize_name(self.text)
+        return [subref_node], []
+
+
+class FigureMarkdown(SphinxDirective):
+    """Directive for creating a figure with Markdown compatible syntax.
+
+    Example::
+
+        :::{figure-md} target
+        <img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="200px">
+
+        This is a caption in **Markdown**
+        :::
+
+    """
+
+    required_arguments = 0
+    optional_arguments = 1  # image target
+    final_argument_whitespace = True
+    has_content = True
+
+    option_spec = {
+        "width": figwidth_value,
+        "class": directives.class_option,
+        "align": align,
+        "name": directives.unchanged,
+    }
+
+    def run(self) -> List[nodes.Node]:
+        figwidth = self.options.pop("width", None)
+        figclasses = self.options.pop("class", None)
+        align = self.options.pop("align", None)
+
+        if not isinstance(self.state, MockState):
+            return [self.figure_error("Directive is only supported in myst parser")]
+        state = cast(MockState, self.state)
+
+        # ensure html image enabled
+        myst_extensions = copy(state._renderer.md_config.enable_extensions)
+        node = nodes.Element()
+        try:
+            state._renderer.md_config.enable_extensions = list(
+                state._renderer.md_config.enable_extensions
+            ) + ["html_image"]
+            state.nested_parse(self.content, self.content_offset, node)
+        finally:
+            state._renderer.md_config.enable_extensions = myst_extensions
+
+        if not len(node.children) == 2:
+            return [
+                self.figure_error(
+                    "content should be one image, "
+                    "followed by a single paragraph caption"
+                )
+            ]
+
+        image_node, caption_para = node.children
+        if isinstance(image_node, nodes.paragraph):
+            image_node = image_node[0]
+
+        if not isinstance(image_node, nodes.image):
+            return [
+                self.figure_error(
+                    "content should be one image (not found), "
+                    "followed by single paragraph caption"
+                )
+            ]
+
+        if not isinstance(caption_para, nodes.paragraph):
+            return [
+                self.figure_error(
+                    "content should be one image, "
+                    "followed by single paragraph caption (not found)"
+                )
+            ]
+
+        caption_node = nodes.caption(caption_para.rawsource, "", *caption_para.children)
+        caption_node.source = caption_para.source
+        caption_node.line = caption_para.line
+
+        figure_node = nodes.figure("", image_node, caption_node)
+        self.set_source_info(figure_node)
+
+        if figwidth is not None:
+            figure_node["width"] = figwidth
+        if figclasses:
+            figure_node["classes"] += figclasses
+        if align:
+            figure_node["align"] = align
+        if self.arguments:
+            self.options["name"] = self.arguments[0]
+            self.add_name(figure_node)
+
+        return [figure_node]
+
+    def figure_error(self, message):
+        """A warning for reporting an invalid figure."""
+        error = self.state_machine.reporter.error(
+            message,
+            nodes.literal_block(self.block_text, self.block_text),
+            line=self.lineno,
+        )
+        return error
diff --git a/myst_parser/sphinx_ext/main.py b/myst_parser/sphinx_ext/main.py
new file mode 100644
index 0000000..f5aeffc
--- /dev/null
+++ b/myst_parser/sphinx_ext/main.py
@@ -0,0 +1,60 @@
+"""The setup for the sphinx extension."""
+from typing import Any
+
+from sphinx.application import Sphinx
+
+
+def setup_sphinx(app: Sphinx, load_parser=False):
+    """Initialize all settings and transforms in Sphinx."""
+    # we do this separately to setup,
+    # so that it can be called by external packages like myst_nb
+    from myst_parser.config.main import MdParserConfig
+    from myst_parser.parsers.sphinx_ import MystParser
+    from myst_parser.sphinx_ext.directives import (
+        FigureMarkdown,
+        SubstitutionReferenceRole,
+    )
+    from myst_parser.sphinx_ext.mathjax import override_mathjax
+    from myst_parser.sphinx_ext.myst_refs import MystReferenceResolver
+
+    if load_parser:
+        app.add_source_suffix(".md", "markdown")
+        app.add_source_parser(MystParser)
+
+    app.add_role("sub-ref", SubstitutionReferenceRole())
+    app.add_directive("figure-md", FigureMarkdown)
+
+    app.add_post_transform(MystReferenceResolver)
+
+    for name, default, field in MdParserConfig().as_triple():
+        if not field.metadata.get("docutils_only", False):
+            # TODO add types?
+            app.add_config_value(f"myst_{name}", default, "env", types=Any)
+
+    app.connect("builder-inited", create_myst_config)
+    app.connect("builder-inited", override_mathjax)
+
+
+def create_myst_config(app):
+    from sphinx.util import logging
+
+    # Ignore type checkers because the attribute is dynamically assigned
+    from sphinx.util.console import bold  # type: ignore[attr-defined]
+
+    from myst_parser import __version__
+    from myst_parser.config.main import MdParserConfig
+
+    logger = logging.getLogger(__name__)
+
+    values = {
+        name: app.config[f"myst_{name}"]
+        for name, _, field in MdParserConfig().as_triple()
+        if not field.metadata.get("docutils_only", False)
+    }
+
+    try:
+        app.env.myst_config = MdParserConfig(**values)
+        logger.info(bold("myst v%s:") + " %s", __version__, app.env.myst_config)
+    except (TypeError, ValueError) as error:
+        logger.error("myst configuration invalid: %s", error.args[0])
+        app.env.myst_config = MdParserConfig()
diff --git a/myst_parser/sphinx_ext/mathjax.py b/myst_parser/sphinx_ext/mathjax.py
new file mode 100644
index 0000000..260f008
--- /dev/null
+++ b/myst_parser/sphinx_ext/mathjax.py
@@ -0,0 +1,118 @@
+"""Overrides to ``sphinx.ext.mathjax``
+
+This fixes two issues:
+
+1. Mathjax should not search for ``$`` delimiters, nor LaTeX amsmath environments,
+   since we already achieve this with the dollarmath and amsmath mrakdown-it-py plugins
+2. amsmath math blocks should be wrapped in mathjax delimiters (default ``\\[...\\]``),
+   and assigned an equation number
+
+"""
+from docutils import nodes
+from sphinx.application import Sphinx
+from sphinx.ext import mathjax
+from sphinx.locale import _
+from sphinx.util import logging
+from sphinx.util.math import get_node_equation_number
+from sphinx.writers.html import HTMLTranslator
+
+logger = logging.getLogger(__name__)
+
+
+def log_override_warning(app: Sphinx, version: int, current: str, new: str) -> None:
+    """Log a warning if MathJax configuration being overridden."""
+    if logging.is_suppressed_warning("myst", "mathjax", app.config.suppress_warnings):
+        return
+    config_name = (
+        "mathjax3_config['options']['processHtmlClass']"
+        if version == 3
+        else "mathjax_config['tex2jax']['processClass']"
+    )
+    logger.warning(
+        f"`{config_name}` is being overridden by myst-parser: '{current}' -> '{new}'. "
+        "Set `suppress_warnings=['myst.mathjax']` to ignore this warning, or "
+        "`myst_update_mathjax=False` if this is undesirable."
+    )
+
+
+def override_mathjax(app: Sphinx):
+    """Override aspects of the mathjax extension.
+
+    MyST-Parser parses dollar and latex math, via markdown-it plugins.
+    Therefore, we tell Mathjax to only render these HTML elements.
+    This is accompanied by setting the `ignoreClass` on the top-level section of each MyST document.
+    """
+    if (
+        "amsmath" in app.config["myst_enable_extensions"]
+        and "mathjax" in app.registry.html_block_math_renderers
+    ):
+        app.registry.html_block_math_renderers["mathjax"] = (
+            html_visit_displaymath,  # type: ignore[assignment]
+            None,
+        )
+
+    if "dollarmath" not in app.config["myst_enable_extensions"]:
+        return
+    if not app.env.myst_config.update_mathjax:  # type: ignore
+        return
+
+    mjax_classes = app.env.myst_config.mathjax_classes  # type: ignore
+
+    if "mathjax3_config" in app.config:
+        # sphinx 4 + mathjax 3
+        app.config.mathjax3_config = app.config.mathjax3_config or {}  # type: ignore
+        app.config.mathjax3_config.setdefault("options", {})
+        if (
+            "processHtmlClass" in app.config.mathjax3_config["options"]
+            and app.config.mathjax3_config["options"]["processHtmlClass"]
+            != mjax_classes
+        ):
+            log_override_warning(
+                app,
+                3,
+                app.config.mathjax3_config["options"]["processHtmlClass"],
+                mjax_classes,
+            )
+        app.config.mathjax3_config["options"]["processHtmlClass"] = mjax_classes
+    elif "mathjax_config" in app.config:
+        # sphinx 3 + mathjax 2
+        app.config.mathjax_config = app.config.mathjax_config or {}  # type: ignore[attr-defined]
+        app.config.mathjax_config.setdefault("tex2jax", {})
+        if (
+            "processClass" in app.config.mathjax_config["tex2jax"]
+            and app.config.mathjax_config["tex2jax"]["processClass"] != mjax_classes
+        ):
+            log_override_warning(
+                app,
+                2,
+                app.config.mathjax_config["tex2jax"]["processClass"],
+                mjax_classes,
+            )
+        app.config.mathjax_config["tex2jax"]["processClass"] = mjax_classes
+
+
+def html_visit_displaymath(self: HTMLTranslator, node: nodes.math_block) -> None:
+    """Override for sphinx.ext.mathjax.html_visit_displaymath to handle amsmath.
+
+    By default displaymath, are normally wrapped in a prefix/suffix,
+    defined by mathjax_display, and labelled nodes are numbered.
+    However, this is not the case if the math_block is set as 'nowrap', as for amsmath.
+    Therefore, we need to override this behaviour.
+    """
+    if "amsmath" in node.get("classes", []):
+        self.body.append(
+            self.starttag(node, "div", CLASS="math notranslate nohighlight amsmath")
+        )
+        if node["number"]:
+            number = get_node_equation_number(self, node)
+            self.body.append('<span class="eqno">(%s)' % number)
+            self.add_permalink_ref(node, _("Permalink to this equation"))
+            self.body.append("</span>")
+        prefix, suffix = self.builder.config.mathjax_display
+        self.body.append(prefix)
+        self.body.append(self.encode(node.astext()))
+        self.body.append(suffix)
+        self.body.append("</div>\n")
+        raise nodes.SkipNode
+
+    return mathjax.html_visit_displaymath(self, node)
diff --git a/myst_parser/sphinx_ext/myst_refs.py b/myst_parser/sphinx_ext/myst_refs.py
new file mode 100644
index 0000000..f364345
--- /dev/null
+++ b/myst_parser/sphinx_ext/myst_refs.py
@@ -0,0 +1,282 @@
+"""A post-transform for overriding the behaviour of sphinx reference resolution.
+
+This is applied to MyST type references only, such as ``[text](target)``,
+and allows for nested syntax
+"""
+import os
+from typing import Any, List, Optional, Tuple, cast
+
+from docutils import nodes
+from docutils.nodes import Element, document
+from sphinx import addnodes, version_info
+from sphinx.addnodes import pending_xref
+from sphinx.domains.std import StandardDomain
+from sphinx.locale import __
+from sphinx.transforms.post_transforms import ReferencesResolver
+from sphinx.util import docname_join, logging
+from sphinx.util.nodes import clean_astext, make_refnode
+
+from myst_parser._compat import findall
+
+try:
+    from sphinx.errors import NoUri
+except ImportError:
+    # sphinx < 2.1
+    from sphinx.environment import NoUri  # type: ignore
+
+logger = logging.getLogger(__name__)
+
+
+class MystReferenceResolver(ReferencesResolver):
+    """Resolves cross-references on doctrees.
+
+    Overrides default sphinx implementation, to allow for nested syntax
+    """
+
+    default_priority = 9  # higher priority than ReferencesResolver (10)
+
+    def run(self, **kwargs: Any) -> None:
+        self.document: document
+        for node in findall(self.document)(addnodes.pending_xref):
+            if node["reftype"] != "myst":
+                continue
+
+            contnode = cast(nodes.TextElement, node[0].deepcopy())
+            newnode = None
+
+            target = node["reftarget"]
+            refdoc = node.get("refdoc", self.env.docname)
+            domain = None
+
+            try:
+                newnode = self.resolve_myst_ref(refdoc, node, contnode)
+                if newnode is None:
+                    # no new node found? try the missing-reference event
+                    # but first we change the the reftype to 'any'
+                    # this means it is picked up by extensions like intersphinx
+                    node["reftype"] = "any"
+                    try:
+                        newnode = self.app.emit_firstresult(
+                            "missing-reference",
+                            self.env,
+                            node,
+                            contnode,
+                            **(
+                                {"allowed_exceptions": (NoUri,)}
+                                if version_info[0] > 2
+                                else {}
+                            ),
+                        )
+                    finally:
+                        node["reftype"] = "myst"
+                    # still not found? warn if node wishes to be warned about or
+                    # we are in nit-picky mode
+                    if newnode is None:
+                        node["refdomain"] = ""
+                        # TODO ideally we would override the warning message here,
+                        # to show the [ref.myst] for suppressing warning
+                        self.warn_missing_reference(
+                            refdoc, node["reftype"], target, node, domain
+                        )
+            except NoUri:
+                newnode = contnode
+
+            node.replace_self(newnode or contnode)
+
+    def resolve_myst_ref(
+        self, refdoc: str, node: pending_xref, contnode: Element
+    ) -> Element:
+        """Resolve reference generated by the "myst" role; ``[text](reference)``.
+
+        This builds on the sphinx ``any`` role to also resolve:
+
+        - Document references with extensions; ``[text](./doc.md)``
+        - Document references with anchors with anchors; ``[text](./doc.md#target)``
+        - Nested syntax for explicit text with std:doc and std:ref;
+          ``[**nested**](reference)``
+
+        """
+        target = node["reftarget"]  # type: str
+        results = []  # type: List[Tuple[str, Element]]
+
+        res_anchor = self._resolve_anchor(node, refdoc)
+        if res_anchor:
+            results.append(("std:doc", res_anchor))
+        else:
+            # if we've already found an anchored doc,
+            # don't search in the std:ref/std:doc (leads to duplication)
+
+            # resolve standard references
+            res = self._resolve_ref_nested(node, refdoc)
+            if res:
+                results.append(("std:ref", res))
+
+            # resolve doc names
+            res = self._resolve_doc_nested(node, refdoc)
+            if res:
+                results.append(("std:doc", res))
+
+        # get allowed domains for referencing
+        ref_domains = self.env.config.myst_ref_domains
+
+        assert self.app.builder
+
+        # next resolve for any other standard reference objects
+        if ref_domains is None or "std" in ref_domains:
+            stddomain = cast(StandardDomain, self.env.get_domain("std"))
+            for objtype in stddomain.object_types:
+                key = (objtype, target)
+                if objtype == "term":
+                    key = (objtype, target.lower())
+                if key in stddomain.objects:
+                    docname, labelid = stddomain.objects[key]
+                    domain_role = "std:" + stddomain.role_for_objtype(objtype)
+                    ref_node = make_refnode(
+                        self.app.builder, refdoc, docname, labelid, contnode
+                    )
+                    results.append((domain_role, ref_node))
+
+        # finally resolve for any other type of allowed reference domain
+        for domain in self.env.domains.values():
+            if domain.name == "std":
+                continue  # we did this one already
+            if ref_domains is not None and domain.name not in ref_domains:
+                continue
+            try:
+                results.extend(
+                    domain.resolve_any_xref(
+                        self.env, refdoc, self.app.builder, target, node, contnode
+                    )
+                )
+            except NotImplementedError:
+                # the domain doesn't yet support the new interface
+                # we have to manually collect possible references (SLOW)
+                if not (getattr(domain, "__module__", "").startswith("sphinx.")):
+                    logger.warning(
+                        f"Domain '{domain.__module__}::{domain.name}' has not "
+                        "implemented a `resolve_any_xref` method [myst.domains]",
+                        type="myst",
+                        subtype="domains",
+                        once=True,
+                    )
+                for role in domain.roles:
+                    res = domain.resolve_xref(
+                        self.env, refdoc, self.app.builder, role, target, node, contnode
+                    )
+                    if res and len(res) and isinstance(res[0], nodes.Element):
+                        results.append((f"{domain.name}:{role}", res))
+
+        # now, see how many matches we got...
+        if not results:
+            return None
+        if len(results) > 1:
+
+            def stringify(name, node):
+                reftitle = node.get("reftitle", node.astext())
+                return f":{name}:`{reftitle}`"
+
+            candidates = " or ".join(stringify(name, role) for name, role in results)
+            logger.warning(
+                __(
+                    f"more than one target found for 'myst' cross-reference {target}: "
+                    f"could be {candidates} [myst.ref]"
+                ),
+                location=node,
+                type="myst",
+                subtype="ref",
+            )
+
+        res_role, newnode = results[0]
+        # Override "myst" class with the actual role type to get the styling
+        # approximately correct.
+        res_domain = res_role.split(":")[0]
+        if len(newnode) > 0 and isinstance(newnode[0], nodes.Element):
+            newnode[0]["classes"] = newnode[0].get("classes", []) + [
+                res_domain,
+                res_role.replace(":", "-"),
+            ]
+
+        return newnode
+
+    def _resolve_anchor(
+        self, node: pending_xref, fromdocname: str
+    ) -> Optional[Element]:
+        """Resolve doc with anchor."""
+        if self.env.config.myst_heading_anchors is None:
+            # no target anchors will have been created, so we don't look for them
+            return None
+        target = node["reftarget"]  # type: str
+        if "#" not in target:
+            return None
+        # the link may be a heading anchor; we need to first get the relative path
+        rel_path, anchor = target.rsplit("#", 1)
+        rel_path = os.path.normpath(rel_path)
+        if rel_path == ".":
+            # anchor in the same doc as the node
+            doc_path = self.env.doc2path(node.get("refdoc", fromdocname), base=False)
+        else:
+            # anchor in a different doc from the node
+            doc_path = os.path.normpath(
+                os.path.join(node.get("refdoc", fromdocname), "..", rel_path)
+            )
+        return self._resolve_ref_nested(node, fromdocname, doc_path + "#" + anchor)
+
+    def _resolve_ref_nested(
+        self, node: pending_xref, fromdocname: str, target=None
+    ) -> Optional[Element]:
+        """This is the same as ``sphinx.domains.std._resolve_ref_xref``,
+        but allows for nested syntax, rather than converting the inner node to raw text.
+        """
+        stddomain = cast(StandardDomain, self.env.get_domain("std"))
+        target = target or node["reftarget"].lower()
+
+        if node["refexplicit"]:
+            # reference to anonymous label; the reference uses
+            # the supplied link caption
+            docname, labelid = stddomain.anonlabels.get(target, ("", ""))
+            sectname = node.astext()
+            innernode = nodes.inline(sectname, "")
+            innernode.extend(node[0].children)
+        else:
+            # reference to named label; the final node will
+            # contain the section name after the label
+            docname, labelid, sectname = stddomain.labels.get(target, ("", "", ""))
+            innernode = nodes.inline(sectname, sectname)
+
+        if not docname:
+            return None
+
+        assert self.app.builder
+        return make_refnode(self.app.builder, fromdocname, docname, labelid, innernode)
+
+    def _resolve_doc_nested(
+        self, node: pending_xref, fromdocname: str
+    ) -> Optional[Element]:
+        """This is the same as ``sphinx.domains.std._resolve_doc_xref``,
+        but allows for nested syntax, rather than converting the inner node to raw text.
+
+        It also allows for extensions on document names.
+        """
+        # directly reference to document by source name; can be absolute or relative
+        refdoc = node.get("refdoc", fromdocname)
+        docname = docname_join(refdoc, node["reftarget"])
+
+        if docname not in self.env.all_docs:
+            # try stripping known extensions from doc name
+            if os.path.splitext(docname)[1] in self.env.config.source_suffix:
+                docname = os.path.splitext(docname)[0]
+            if docname not in self.env.all_docs:
+                return None
+
+        if node["refexplicit"]:
+            # reference with explicit title
+            caption = node.astext()
+            innernode = nodes.inline(caption, "", classes=["doc"])
+            innernode.extend(node[0].children)
+        else:
+            # TODO do we want nested syntax for titles?
+            caption = clean_astext(self.env.titles[docname])
+            innernode = nodes.inline(caption, caption, classes=["doc"])
+
+        assert self.app.builder
+        return make_refnode(self.app.builder, fromdocname, docname, "", innernode)