From 943e3dc057eca53e68ddec51529bd6a1279ebd8e Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Mon, 29 Apr 2024 06:23:02 +0200
Subject: Adding upstream version 0.18.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 myst_parser/__init__.py                       |   10 +
 myst_parser/_compat.py                        |   11 +
 myst_parser/_docs.py                          |  198 ++++
 myst_parser/cli.py                            |   42 +
 myst_parser/config/__init__.py                |    1 +
 myst_parser/config/dc_validators.py           |  161 +++
 myst_parser/config/main.py                    |  409 +++++++
 myst_parser/docutils_.py                      |    6 +
 myst_parser/mdit_to_docutils/__init__.py      |    1 +
 myst_parser/mdit_to_docutils/base.py          | 1483 +++++++++++++++++++++++++
 myst_parser/mdit_to_docutils/html_to_nodes.py |  139 +++
 myst_parser/mdit_to_docutils/sphinx_.py       |  245 ++++
 myst_parser/mdit_to_docutils/utils.py         |   36 +
 myst_parser/mocking.py                        |  514 +++++++++
 myst_parser/parsers/__init__.py               |    1 +
 myst_parser/parsers/directives.py             |  190 ++++
 myst_parser/parsers/docutils_.py              |  275 +++++
 myst_parser/parsers/mdit.py                   |  123 ++
 myst_parser/parsers/parse_html.py             |  440 ++++++++
 myst_parser/parsers/sphinx_.py                |   69 ++
 myst_parser/py.typed                          |    1 +
 myst_parser/sphinx_.py                        |    6 +
 myst_parser/sphinx_ext/__init__.py            |    1 +
 myst_parser/sphinx_ext/directives.py          |  136 +++
 myst_parser/sphinx_ext/main.py                |   60 +
 myst_parser/sphinx_ext/mathjax.py             |  118 ++
 myst_parser/sphinx_ext/myst_refs.py           |  282 +++++
 27 files changed, 4958 insertions(+)
 create mode 100644 myst_parser/__init__.py
 create mode 100644 myst_parser/_compat.py
 create mode 100644 myst_parser/_docs.py
 create mode 100644 myst_parser/cli.py
 create mode 100644 myst_parser/config/__init__.py
 create mode 100644 myst_parser/config/dc_validators.py
 create mode 100644 myst_parser/config/main.py
 create mode 100644 myst_parser/docutils_.py
 create mode 100644 myst_parser/mdit_to_docutils/__init__.py
 create mode 100644 myst_parser/mdit_to_docutils/base.py
 create mode 100644 myst_parser/mdit_to_docutils/html_to_nodes.py
 create mode 100644 myst_parser/mdit_to_docutils/sphinx_.py
 create mode 100644 myst_parser/mdit_to_docutils/utils.py
 create mode 100644 myst_parser/mocking.py
 create mode 100644 myst_parser/parsers/__init__.py
 create mode 100644 myst_parser/parsers/directives.py
 create mode 100644 myst_parser/parsers/docutils_.py
 create mode 100644 myst_parser/parsers/mdit.py
 create mode 100644 myst_parser/parsers/parse_html.py
 create mode 100644 myst_parser/parsers/sphinx_.py
 create mode 100644 myst_parser/py.typed
 create mode 100644 myst_parser/sphinx_.py
 create mode 100644 myst_parser/sphinx_ext/__init__.py
 create mode 100644 myst_parser/sphinx_ext/directives.py
 create mode 100644 myst_parser/sphinx_ext/main.py
 create mode 100644 myst_parser/sphinx_ext/mathjax.py
 create mode 100644 myst_parser/sphinx_ext/myst_refs.py

(limited to 'myst_parser')

diff --git a/myst_parser/__init__.py b/myst_parser/__init__.py
new file mode 100644
index 0000000..56dd460
--- /dev/null
+++ b/myst_parser/__init__.py
@@ -0,0 +1,10 @@
+"""An extended commonmark compliant parser, with bridges to docutils & sphinx."""
+__version__ = "0.18.1"
+
+
+def setup(app):
+    """Initialize the Sphinx extension."""
+    from myst_parser.sphinx_ext.main import setup_sphinx
+
+    setup_sphinx(app, load_parser=True)
+    return {"version": __version__, "parallel_read_safe": True}
diff --git a/myst_parser/_compat.py b/myst_parser/_compat.py
new file mode 100644
index 0000000..d29cf4d
--- /dev/null
+++ b/myst_parser/_compat.py
@@ -0,0 +1,11 @@
+"""Helpers for cross compatibility across dependency versions."""
+from typing import Callable, Iterable
+
+from docutils.nodes import Element
+
+
+def findall(node: Element) -> Callable[..., Iterable[Element]]:
+    """Iterate through"""
+    # findall replaces traverse in docutils v0.18
+    # note a difference is that findall is an iterator
+    return getattr(node, "findall", node.traverse)
diff --git a/myst_parser/_docs.py b/myst_parser/_docs.py
new file mode 100644
index 0000000..a7c46a3
--- /dev/null
+++ b/myst_parser/_docs.py
@@ -0,0 +1,198 @@
+"""Code to use internally, for documentation."""
+from __future__ import annotations
+
+import io
+from typing import Sequence, Union
+
+from docutils import nodes
+from docutils.frontend import OptionParser
+from docutils.parsers.rst import directives
+from sphinx.directives import other
+from sphinx.util import logging
+from sphinx.util.docutils import SphinxDirective
+from typing_extensions import get_args, get_origin
+
+from .config.main import MdParserConfig
+from .parsers.docutils_ import Parser as DocutilsParser
+
+logger = logging.getLogger(__name__)
+
+
+class _ConfigBase(SphinxDirective):
+    """Directive to automate rendering of the configuration."""
+
+    @staticmethod
+    def table_header():
+        return [
+            "```````{list-table}",
+            ":header-rows: 1",
+            ":widths: 15 10 20",
+            "",
+            "* - Name",
+            "  - Type",
+            "  - Description",
+        ]
+
+    @staticmethod
+    def field_default(value):
+        default = " ".join(f"{value!r}".splitlines())
+        return default
+
+    @staticmethod
+    def field_type(field):
+        ftypes: Sequence[str]
+        if get_origin(field.type) is Union:
+            ftypes = get_args(field.type)
+        else:
+            ftypes = [field.type]
+        ctype = " | ".join(
+            str("None" if ftype == type(None) else ftype)  # type: ignore  # noqa: E721
+            for ftype in ftypes
+        )
+        ctype = " ".join(ctype.splitlines())
+        ctype = ctype.replace("typing.", "")
+        ctype = ctype.replace("typing_extensions.", "")
+        for tname in ("str", "int", "float", "bool"):
+            ctype = ctype.replace(f"<class '{tname}'>", tname)
+        return ctype
+
+
+class MystConfigDirective(_ConfigBase):
+
+    option_spec = {
+        "sphinx": directives.flag,
+        "extensions": directives.flag,
+        "scope": lambda x: directives.choice(x, ["global", "local"]),
+    }
+
+    def run(self):
+        """Run the directive."""
+        config = MdParserConfig()
+        text = self.table_header()
+        count = 0
+        for name, value, field in config.as_triple():
+
+            # filter by sphinx options
+            if "sphinx" in self.options and field.metadata.get("sphinx_exclude"):
+                continue
+
+            if "extensions" in self.options:
+                if not field.metadata.get("extension"):
+                    continue
+            else:
+                if field.metadata.get("extension"):
+                    continue
+
+            if self.options.get("scope") == "local":
+                if field.metadata.get("global_only"):
+                    continue
+
+            if self.options.get("scope") == "global":
+                name = f"myst_{name}"
+
+            description = " ".join(field.metadata.get("help", "").splitlines())
+            if field.metadata.get("extension"):
+                description = f"{field.metadata.get('extension')}: {description}"
+            default = self.field_default(value)
+            ctype = self.field_type(field)
+            text.extend(
+                [
+                    f"* - `{name}`",
+                    f"  - `{ctype}`",
+                    f"  - {description} (default: `{default}`)",
+                ]
+            )
+
+            count += 1
+
+        if not count:
+            return []
+
+        text.append("```````")
+        node = nodes.Element()
+        self.state.nested_parse(text, 0, node)
+        return node.children
+
+
+class DocutilsCliHelpDirective(SphinxDirective):
+    """Directive to print the docutils CLI help."""
+
+    has_content = False
+    required_arguments = 0
+    optional_arguments = 0
+    final_argument_whitespace = False
+
+    def run(self):
+        """Run the directive."""
+        stream = io.StringIO()
+        OptionParser(
+            components=(DocutilsParser,),
+            usage="myst-docutils-<writer> [options] [<source> [<destination>]]",
+        ).print_help(stream)
+        return [nodes.literal_block("", stream.getvalue())]
+
+
+class DirectiveDoc(SphinxDirective):
+    """Load and document a directive."""
+
+    required_arguments = 1  # name of the directive
+    has_content = True
+
+    def run(self):
+        """Run the directive."""
+        name = self.arguments[0]
+        # load the directive class
+        klass, _ = directives.directive(
+            name, self.state.memo.language, self.state.document
+        )
+        if klass is None:
+            logger.warning(f"Directive {name} not found.", line=self.lineno)
+            return []
+        content = " ".join(self.content)
+        text = f"""\
+:Name: `{name}`
+:Description: {content}
+:Arguments: {klass.required_arguments} required, {klass.optional_arguments} optional
+:Content: {'yes' if klass.has_content else 'no'}
+:Options:
+"""
+        if klass.option_spec:
+            text += "  name | type\n  -----|------\n"
+            for key, func in klass.option_spec.items():
+                text += f"  {key} | {convert_opt(name, func)}\n"
+        node = nodes.Element()
+        self.state.nested_parse(text.splitlines(), 0, node)
+        return node.children
+
+
+def convert_opt(name, func):
+    """Convert an option function to a string."""
+    if func is directives.flag:
+        return "flag"
+    if func is directives.unchanged:
+        return "text"
+    if func is directives.unchanged_required:
+        return "text"
+    if func is directives.class_option:
+        return "space-delimited list"
+    if func is directives.uri:
+        return "URI"
+    if func is directives.path:
+        return "path"
+    if func is int:
+        return "integer"
+    if func is directives.positive_int:
+        return "integer (positive)"
+    if func is directives.nonnegative_int:
+        return "integer (non-negative)"
+    if func is directives.positive_int_list:
+        return "space/comma-delimited list of integers (positive)"
+    if func is directives.percentage:
+        return "percentage"
+    if func is directives.length_or_unitless:
+        return "length or unitless"
+    if func is directives.length_or_percentage_or_unitless:
+        return "length, percentage or unitless"
+    if func is other.int_or_nothing:
+        return "integer"
+    return ""
diff --git a/myst_parser/cli.py b/myst_parser/cli.py
new file mode 100644
index 0000000..b9bb1ba
--- /dev/null
+++ b/myst_parser/cli.py
@@ -0,0 +1,42 @@
+import argparse
+import sys
+
+from markdown_it.renderer import RendererHTML
+
+from myst_parser.config.main import MdParserConfig
+from myst_parser.parsers.mdit import create_md_parser
+
+
+def print_anchors(args=None):
+    """ """
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument(
+        "input",
+        nargs="?",
+        type=argparse.FileType("r", encoding="utf8"),
+        default=sys.stdin,
+        help="Input file (default stdin)",
+    )
+    arg_parser.add_argument(
+        "-o",
+        "--output",
+        type=argparse.FileType("w", encoding="utf8"),
+        default=sys.stdout,
+        help="Output file (default stdout)",
+    )
+    arg_parser.add_argument(
+        "-l", "--level", type=int, default=2, help="Maximum heading level."
+    )
+    args = arg_parser.parse_args(args)
+    parser = create_md_parser(MdParserConfig(heading_anchors=args.level), RendererHTML)
+
+    def _filter_plugin(state):
+        state.tokens = [
+            t
+            for t in state.tokens
+            if t.type.startswith("heading_") and int(t.tag[1]) <= args.level
+        ]
+
+    parser.use(lambda p: p.core.ruler.push("filter", _filter_plugin))
+    text = parser.render(args.input.read())
+    args.output.write(text)
diff --git a/myst_parser/config/__init__.py b/myst_parser/config/__init__.py
new file mode 100644
index 0000000..898f9ce
--- /dev/null
+++ b/myst_parser/config/__init__.py
@@ -0,0 +1 @@
+"""This module holds the global configuration for the parser ``MdParserConfig``."""
diff --git a/myst_parser/config/dc_validators.py b/myst_parser/config/dc_validators.py
new file mode 100644
index 0000000..765cfb9
--- /dev/null
+++ b/myst_parser/config/dc_validators.py
@@ -0,0 +1,161 @@
+"""Validators for dataclasses, mirroring those of https://github.com/python-attrs/attrs."""
+from __future__ import annotations
+
+import dataclasses as dc
+from typing import Any, Sequence
+
+from typing_extensions import Protocol
+
+
+def validate_field(inst: Any, field: dc.Field, value: Any) -> None:
+    """Validate the field of a dataclass,
+    according to a `validator` function set in the field.metadata.
+
+    The validator function should take as input (inst, field, value) and
+    raise an exception if the value is invalid.
+    """
+    if "validator" not in field.metadata:
+        return
+    if isinstance(field.metadata["validator"], list):
+        for validator in field.metadata["validator"]:
+            validator(inst, field, value)
+    else:
+        field.metadata["validator"](inst, field, value)
+
+
+def validate_fields(inst: Any) -> None:
+    """Validate the fields of a dataclass,
+    according to `validator` functions set in the field metadata.
+
+    This function should be called in the `__post_init__` of the dataclass.
+
+    The validator function should take as input (inst, field, value) and
+    raise an exception if the value is invalid.
+    """
+    for field in dc.fields(inst):
+        validate_field(inst, field, getattr(inst, field.name))
+
+
+class ValidatorType(Protocol):
+    def __call__(
+        self, inst: bytes, field: dc.Field, value: Any, suffix: str = ""
+    ) -> None:
+        ...
+
+
+def instance_of(type: type[Any] | tuple[type[Any], ...]) -> ValidatorType:
+    """
+    A validator that raises a `TypeError` if the initializer is called
+    with a wrong type for this particular attribute (checks are performed using
+    `isinstance` therefore it's also valid to pass a tuple of types).
+
+    :param type: The type to check for.
+    """
+
+    def _validator(inst, field, value, suffix=""):
+        """
+        We use a callable class to be able to change the ``__repr__``.
+        """
+        if not isinstance(value, type):
+            raise TypeError(
+                f"'{field.name}{suffix}' must be of type {type!r} "
+                f"(got {value!r} that is a {value.__class__!r})."
+            )
+
+    return _validator
+
+
+def optional(validator: ValidatorType) -> ValidatorType:
+    """
+    A validator that makes an attribute optional.  An optional attribute is one
+    which can be set to ``None`` in addition to satisfying the requirements of
+    the sub-validator.
+    """
+
+    def _validator(inst, field, value, suffix=""):
+        if value is None:
+            return
+
+        validator(inst, field, value, suffix=suffix)
+
+    return _validator
+
+
+def is_callable(inst, field, value, suffix=""):
+    """
+    A validator that raises a `TypeError` if the
+    initializer is called with a value for this particular attribute
+    that is not callable.
+    """
+    if not callable(value):
+        raise TypeError(
+            f"'{field.name}{suffix}' must be callable "
+            f"(got {value!r} that is a {value.__class__!r})."
+        )
+
+
+def in_(options: Sequence) -> ValidatorType:
+    """
+    A validator that raises a `ValueError` if the initializer is called
+    with a value that does not belong in the options provided.  The check is
+    performed using ``value in options``.
+
+    :param options: Allowed options.
+    """
+
+    def _validator(inst, field, value, suffix=""):
+        try:
+            in_options = value in options
+        except TypeError:  # e.g. `1 in "abc"`
+            in_options = False
+
+        if not in_options:
+            raise ValueError(
+                f"'{field.name}{suffix}' must be in {options!r} (got {value!r})"
+            )
+
+    return _validator
+
+
+def deep_iterable(
+    member_validator: ValidatorType, iterable_validator: ValidatorType | None = None
+) -> ValidatorType:
+    """
+    A validator that performs deep validation of an iterable.
+
+    :param member_validator: Validator to apply to iterable members
+    :param iterable_validator: Validator to apply to iterable itself
+    """
+
+    def _validator(inst, field, value, suffix=""):
+        if iterable_validator is not None:
+            iterable_validator(inst, field, value, suffix=suffix)
+
+        for idx, member in enumerate(value):
+            member_validator(inst, field, member, suffix=f"{suffix}[{idx}]")
+
+    return _validator
+
+
+def deep_mapping(
+    key_validator: ValidatorType,
+    value_validator: ValidatorType,
+    mapping_validator: ValidatorType | None = None,
+) -> ValidatorType:
+    """
+    A validator that performs deep validation of a dictionary.
+
+    :param key_validator: Validator to apply to dictionary keys
+    :param value_validator: Validator to apply to dictionary values
+    :param mapping_validator: Validator to apply to top-level mapping attribute (optional)
+    """
+
+    def _validator(inst, field: dc.Field, value, suffix=""):
+        if mapping_validator is not None:
+            mapping_validator(inst, field, value)
+
+        for key in value:
+            key_validator(inst, field, key, suffix=f"{suffix}[{key!r}]")
+            value_validator(inst, field, value[key], suffix=f"{suffix}[{key!r}]")
+
+    return _validator
diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py
new file mode 100644
index 0000000..a134ea7
--- /dev/null
+++ b/myst_parser/config/main.py
@@ -0,0 +1,409 @@
+"""The configuration for the myst parser."""
+import dataclasses as dc
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+    cast,
+)
+
+from .dc_validators import (
+    deep_iterable,
+    deep_mapping,
+    in_,
+    instance_of,
+    is_callable,
+    optional,
+    validate_field,
+    validate_fields,
+)
+
+
+def check_extensions(_, __, value):
+    if not isinstance(value, Iterable):
+        raise TypeError(f"'enable_extensions' not iterable: {value}")
+    diff = set(value).difference(
+        [
+            "amsmath",
+            "attrs_image",
+            "colon_fence",
+            "deflist",
+            "dollarmath",
+            "fieldlist",
+            "html_admonition",
+            "html_image",
+            "linkify",
+            "replacements",
+            "smartquotes",
+            "strikethrough",
+            "substitution",
+            "tasklist",
+        ]
+    )
+    if diff:
+        raise ValueError(f"'enable_extensions' items not recognised: {diff}")
+
+
+def check_sub_delimiters(_, __, value):
+    if (not isinstance(value, (tuple, list))) or len(value) != 2:
+        raise TypeError(f"myst_sub_delimiters is not a tuple of length 2: {value}")
+    for delim in value:
+        if (not isinstance(delim, str)) or len(delim) != 1:
+            raise TypeError(
+                f"myst_sub_delimiters does not contain strings of length 1: {value}"
+            )
+
+
+@dc.dataclass()
+class MdParserConfig:
+    """Configuration options for the Markdown Parser.
+
+    Note in the sphinx configuration these option names are prepended with ``myst_``
+    """
+
+    # TODO replace commonmark_only, gfm_only with a single option
+
+    commonmark_only: bool = dc.field(
+        default=False,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Use strict CommonMark parser",
+        },
+    )
+    gfm_only: bool = dc.field(
+        default=False,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Use strict Github Flavoured Markdown parser",
+        },
+    )
+
+    enable_extensions: Sequence[str] = dc.field(
+        default_factory=list,
+        metadata={"validator": check_extensions, "help": "Enable syntax extensions"},
+    )
+
+    disable_syntax: Iterable[str] = dc.field(
+        default_factory=list,
+        metadata={
+            "validator": deep_iterable(instance_of(str), instance_of((list, tuple))),
+            "help": "Disable Commonmark syntax elements",
+        },
+    )
+
+    all_links_external: bool = dc.field(
+        default=False,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Parse all links as simple hyperlinks",
+        },
+    )
+
+    # see https://en.wikipedia.org/wiki/List_of_URI_schemes
+    url_schemes: Optional[Iterable[str]] = dc.field(
+        default=cast(Optional[Iterable[str]], ("http", "https", "mailto", "ftp")),
+        metadata={
+            "validator": optional(
+                deep_iterable(instance_of(str), instance_of((list, tuple)))
+            ),
+            "help": "URL scheme prefixes identified as external links",
+        },
+    )
+
+    ref_domains: Optional[Iterable[str]] = dc.field(
+        default=None,
+        metadata={
+            "validator": optional(
+                deep_iterable(instance_of(str), instance_of((list, tuple)))
+            ),
+            "help": "Sphinx domain names to search in for link references",
+        },
+    )
+
+    highlight_code_blocks: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Syntax highlight code blocks with pygments",
+            "docutils_only": True,
+        },
+    )
+
+    number_code_blocks: Sequence[str] = dc.field(
+        default_factory=list,
+        metadata={
+            "validator": deep_iterable(instance_of(str), instance_of((list, tuple))),
+            "help": "Add line numbers to code blocks with these languages",
+        },
+    )
+
+    title_to_header: bool = dc.field(
+        default=False,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Convert a `title` field in the top-matter to a H1 header",
+        },
+    )
+
+    heading_anchors: Optional[int] = dc.field(
+        default=None,
+        metadata={
+            "validator": optional(in_([1, 2, 3, 4, 5, 6, 7])),
+            "help": "Heading level depth to assign HTML anchors",
+        },
+    )
+
+    heading_slug_func: Optional[Callable[[str], str]] = dc.field(
+        default=None,
+        metadata={
+            "validator": optional(is_callable),
+            "help": "Function for creating heading anchors",
+            "global_only": True,
+        },
+    )
+
+    html_meta: Dict[str, str] = dc.field(
+        default_factory=dict,
+        repr=False,
+        metadata={
+            "validator": deep_mapping(
+                instance_of(str), instance_of(str), instance_of(dict)
+            ),
+            "merge_topmatter": True,
+            "help": "HTML meta tags",
+        },
+    )
+
+    footnote_transition: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Place a transition before any footnotes",
+        },
+    )
+
+    words_per_minute: int = dc.field(
+        default=200,
+        metadata={
+            "validator": instance_of(int),
+            "help": "For reading speed calculations",
+        },
+    )
+
+    # Extension specific
+
+    substitutions: Dict[str, Union[str, int, float]] = dc.field(
+        default_factory=dict,
+        repr=False,
+        metadata={
+            "validator": deep_mapping(
+                instance_of(str), instance_of((str, int, float)), instance_of(dict)
+            ),
+            "merge_topmatter": True,
+            "help": "Substitutions mapping",
+            "extension": "substitutions",
+        },
+    )
+
+    sub_delimiters: Tuple[str, str] = dc.field(
+        default=("{", "}"),
+        metadata={
+            "validator": check_sub_delimiters,
+            "help": "Substitution delimiters",
+            "extension": "substitutions",
+        },
+    )
+
+    linkify_fuzzy_links: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Recognise URLs without schema prefixes",
+            "extension": "linkify",
+        },
+    )
+
+    dmath_allow_labels: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Parse `$$...$$ (label)`",
+            "extension": "dollarmath",
+        },
+    )
+    dmath_allow_space: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Allow initial/final spaces in `$ ... $`",
+            "extension": "dollarmath",
+        },
+    )
+    dmath_allow_digits: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Allow initial/final digits `1$ ...$2`",
+            "extension": "dollarmath",
+        },
+    )
+    dmath_double_inline: bool = dc.field(
+        default=False,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Parse inline `$$ ... $$`",
+            "extension": "dollarmath",
+        },
+    )
+
+    update_mathjax: bool = dc.field(
+        default=True,
+        metadata={
+            "validator": instance_of(bool),
+            "help": "Update sphinx.ext.mathjax configuration to ignore `$` delimiters",
+            "extension": "dollarmath",
+            "global_only": True,
+        },
+    )
+
+    mathjax_classes: str = dc.field(
+        default="tex2jax_process|mathjax_process|math|output_area",
+        metadata={
+            "validator": instance_of(str),
+            "help": "MathJax classes to add to math HTML",
+            "extension": "dollarmath",
+            "global_only": True,
+        },
+    )
+
+    def __post_init__(self):
+        validate_fields(self)
+
+    def copy(self, **kwargs: Any) -> "MdParserConfig":
+        """Return a new object replacing specified fields with new values.
+
+        Note: initiating the copy will also validate the new fields.
+        """
+        return dc.replace(self, **kwargs)
+
+    @classmethod
+    def get_fields(cls) -> Tuple[dc.Field, ...]:
+        """Return all attribute fields in this class."""
+        return dc.fields(cls)
+
+    def as_dict(self, dict_factory=dict) -> dict:
+        """Return a dictionary of field name -> value."""
+        return dc.asdict(self, dict_factory=dict_factory)
+
+    def as_triple(self) -> Iterable[Tuple[str, Any, dc.Field]]:
+        """Yield triples of (name, value, field)."""
+        fields = {f.name: f for f in dc.fields(self.__class__)}
+        for name, value in dc.asdict(self).items():
+            yield name, value, fields[name]
+
+
+def merge_file_level(
+    config: MdParserConfig,
+    topmatter: Dict[str, Any],
+    warning: Callable[[str, str], None],
+) -> MdParserConfig:
+    """Merge the file-level topmatter with the global config.
+
+    :param config: Global config.
+    :param topmatter: Topmatter from the file.
+    :param warning: Function to call with a warning (type, message).
+    :returns: A new config object
+    """
+    # get updates
+    updates: Dict[str, Any] = {}
+    myst = topmatter.get("myst", {})
+    if not isinstance(myst, dict):
+        warning("topmatter", f"'myst' key not a dict: {type(myst)}")
+    else:
+        updates = myst
+
+    # allow html_meta and substitutions at top-level for back-compatibility
+    if "html_meta" in topmatter:
+        warning(
+            "topmatter",
+            "top-level 'html_meta' key is deprecated, "
+            "place under 'myst' key instead",
+        )
+        updates["html_meta"] = topmatter["html_meta"]
+    if "substitutions" in topmatter:
+        warning(
+            "topmatter",
+            "top-level 'substitutions' key is deprecated, "
+            "place under 'myst' key instead",
+        )
+        updates["substitutions"] = topmatter["substitutions"]
+
+    new = config.copy()
+
+    # validate each update
+    fields = {name: (value, field) for name, value, field in config.as_triple()}
+    for name, value in updates.items():
+
+        if name not in fields:
+            warning("topmatter", f"Unknown field: {name}")
+            continue
+
+        old_value, field = fields[name]
+
+        try:
+            validate_field(new, field, value)
+        except Exception as exc:
+            warning("topmatter", str(exc))
+            continue
+
+        if field.metadata.get("merge_topmatter"):
+            value = {**old_value, **value}
+
+        setattr(new, name, value)
+
+    return new
+
+
+class TopmatterReadError(Exception):
+    """Topmatter parsing error."""
+
+
+def read_topmatter(text: Union[str, Iterator[str]]) -> Optional[Dict[str, Any]]:
+    """Read the (optional) YAML topmatter from a source string.
+
+    This is identified by the first line starting with `---`,
+    then read up to a terminating line of `---`, or `...`.
+
+    :param source: The source string to read from
+    :return: The topmatter
+    """
+    import yaml
+
+    if isinstance(text, str):
+        if not text.startswith("---"):  # skip creating the line list in memory
+            return None
+        text = (line for line in text.splitlines())
+    try:
+        if not next(text).startswith("---"):
+            return None
+    except StopIteration:
+        return None
+    top_matter = []
+    for line in text:
+        if line.startswith("---") or line.startswith("..."):
+            break
+        top_matter.append(line.rstrip() + "\n")
+    try:
+        metadata = yaml.safe_load("".join(top_matter))
+        assert isinstance(metadata, dict)
+    except (yaml.parser.ParserError, yaml.scanner.ScannerError) as err:
+        raise TopmatterReadError("Malformed YAML") from err
+    if not isinstance(metadata, dict):
+        raise TopmatterReadError(f"YAML is not a dict: {type(metadata)}")
+    return metadata
diff --git a/myst_parser/docutils_.py b/myst_parser/docutils_.py
new file mode 100644
index 0000000..6f2cc84
--- /dev/null
+++ b/myst_parser/docutils_.py
@@ -0,0 +1,6 @@
+"""A module for compatibility with the docutils>=0.17 `include` directive, in RST documents::
+
+   .. include:: path/to/file.md
+      :parser: myst_parser.docutils_
+"""
+from myst_parser.parsers.docutils_ import Parser  # noqa: F401
diff --git a/myst_parser/mdit_to_docutils/__init__.py b/myst_parser/mdit_to_docutils/__init__.py
new file mode 100644
index 0000000..0b9307f
--- /dev/null
+++ b/myst_parser/mdit_to_docutils/__init__.py
@@ -0,0 +1 @@
+"""Conversion of Markdown-it tokens to docutils AST."""
diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py
new file mode 100644
index 0000000..cedd6c3
--- /dev/null
+++ b/myst_parser/mdit_to_docutils/base.py
@@ -0,0 +1,1483 @@
+"""Convert Markdown-it tokens to docutils nodes."""
+from __future__ import annotations
+
+import inspect
+import json
+import os
+import re
+from collections import OrderedDict
+from contextlib import contextmanager
+from datetime import date, datetime
+from types import ModuleType
+from typing import TYPE_CHECKING, Any, Iterator, MutableMapping, Sequence, cast
+from urllib.parse import urlparse
+
+import jinja2
+import yaml
+from docutils import nodes
+from docutils.frontend import OptionParser
+from docutils.languages import get_language
+from docutils.parsers.rst import Directive, DirectiveError
+from docutils.parsers.rst import Parser as RSTParser
+from docutils.parsers.rst import directives, roles
+from docutils.parsers.rst.directives.misc import Include
+from docutils.parsers.rst.languages import get_language as get_language_rst
+from docutils.statemachine import StringList
+from docutils.transforms.components import Filter
+from docutils.utils import Reporter, new_document
+from docutils.utils.code_analyzer import Lexer, LexerError, NumberLines
+from markdown_it import MarkdownIt
+from markdown_it.common.utils import escapeHtml
+from markdown_it.renderer import RendererProtocol
+from markdown_it.token import Token
+from markdown_it.tree import SyntaxTreeNode
+
+from myst_parser._compat import findall
+from myst_parser.config.main import MdParserConfig
+from myst_parser.mocking import (
+    MockIncludeDirective,
+    MockingError,
+    MockInliner,
+    MockRSTParser,
+    MockState,
+    MockStateMachine,
+)
+from myst_parser.parsers.directives import DirectiveParsingError, parse_directive_text
+from .html_to_nodes import html_to_nodes
+from .utils import is_external_url
+
+if TYPE_CHECKING:
+    from sphinx.environment import BuildEnvironment
+
+
+def make_document(source_path="notset", parser_cls=RSTParser) -> nodes.document:
+    """Create a new docutils document, with the parser classes' default settings."""
+    settings = OptionParser(components=(parser_cls,)).get_default_values()
+    return new_document(source_path, settings=settings)
+
+
+REGEX_DIRECTIVE_START = re.compile(r"^[\s]{0,3}([`]{3,10}|[~]{3,10}|[:]{3,10})\{")
+
+
+def token_line(token: SyntaxTreeNode, default: int | None = None) -> int:
+    """Retrieve the initial line of a token."""
+    if not getattr(token, "map", None):
+        if default is not None:
+            return default
+        raise ValueError(f"token map not set: {token}")
+    return token.map[0]  # type: ignore[index]
+
+
+def create_warning(
+    document: nodes.document,
+    message: str,
+    *,
+    line: int | None = None,
+    append_to: nodes.Element | None = None,
+    wtype: str = "myst",
+    subtype: str = "other",
+) -> nodes.system_message | None:
+    """Generate a warning, logging if it is necessary.
+
+    Note this is overridden in the ``SphinxRenderer``,
+    to handle suppressed warning types.
+    """
+    kwargs = {"line": line} if line is not None else {}
+    msg_node = document.reporter.warning(f"{message} [{wtype}.{subtype}]", **kwargs)
+    if append_to is not None:
+        append_to.append(msg_node)
+    return msg_node
+
+
+class DocutilsRenderer(RendererProtocol):
+    """A markdown-it-py renderer to populate (in-place) a `docutils.document` AST.
+
+    Note, this render is not dependent on Sphinx.
+    """
+
+    __output__ = "docutils"
+
+    def __init__(self, parser: MarkdownIt) -> None:
+        """Load the renderer (called by ``MarkdownIt``)"""
+        self.md = parser
+        self.rules = {
+            k: v
+            for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
+            if k.startswith("render_") and k != "render_children"
+        }
+
+    def __getattr__(self, name: str):
+        """Warn when the renderer has not been setup yet."""
+        if name in (
+            "md_env",
+            "md_config",
+            "md_options",
+            "document",
+            "current_node",
+            "reporter",
+            "language_module_rst",
+            "_level_to_elem",
+        ):
+            raise AttributeError(
+                f"'{name}' attribute is not available until setup_render() is called"
+            )
+        raise AttributeError(
+            f"'{type(self).__name__}' object has no attribute '{name}'"
+        )
+
+    def setup_render(
+        self, options: dict[str, Any], env: MutableMapping[str, Any]
+    ) -> None:
+        """Setup the renderer with per render variables."""
+        self.md_env = env
+        self.md_options = options
+        self.md_config: MdParserConfig = options["myst_config"]
+        self.document: nodes.document = options.get("document", make_document())
+        self.current_node: nodes.Element = options.get("current_node", self.document)
+        self.reporter: Reporter = self.document.reporter
+        # note there are actually two possible language modules:
+        # one from docutils.languages, and one from docutils.parsers.rst.languages
+        self.language_module_rst: ModuleType = get_language_rst(
+            self.document.settings.language_code
+        )
+        # a mapping of heading levels to its currently associated node
+        self._level_to_elem: dict[int, nodes.document | nodes.section] = {
+            0: self.document
+        }
+
+    @property
+    def sphinx_env(self) -> BuildEnvironment | None:
+        """Return the sphinx env, if using Sphinx."""
+        try:
+            return self.document.settings.env
+        except AttributeError:
+            return None
+
+    def create_warning(
+        self,
+        message: str,
+        *,
+        line: int | None = None,
+        append_to: nodes.Element | None = None,
+        wtype: str = "myst",
+        subtype: str = "other",
+    ) -> nodes.system_message | None:
+        """Generate a warning, logging if it is necessary.
+
+        Note this is overridden in the ``SphinxRenderer``,
+        to handle suppressed warning types.
+        """
+        return create_warning(
+            self.document,
+            message,
+            line=line,
+            append_to=append_to,
+            wtype=wtype,
+            subtype=subtype,
+        )
+
+    def _render_tokens(self, tokens: list[Token]) -> None:
+        """Render the tokens."""
+        # propagate line number down to inline elements
+        for token in tokens:
+            if not token.map:
+                continue
+            # For docutils we want 1 based line numbers (not 0)
+            token.map = [token.map[0] + 1, token.map[1] + 1]
+            for token_child in token.children or []:
+                token_child.map = token.map
+
+        # nest tokens
+        node_tree = SyntaxTreeNode(tokens)
+
+        # move footnote definitions to env
+        self.md_env.setdefault("foot_refs", {})
+        for node in node_tree.walk(include_self=True):
+            new_children = []
+            for child in node.children:
+                if child.type == "footnote_reference":
+                    label = child.meta["label"]
+                    self.md_env["foot_refs"].setdefault(label, []).append(child)
+                else:
+                    new_children.append(child)
+
+            node.children = new_children
+
+        # render
+        for child in node_tree.children:
+            # skip hidden?
+            if f"render_{child.type}" in self.rules:
+                self.rules[f"render_{child.type}"](child)
+            else:
+                self.create_warning(
+                    f"No render method for: {child.type}",
+                    line=token_line(child, default=0),
+                    subtype="render",
+                    append_to=self.current_node,
+                )
+
+    def render(
+        self, tokens: Sequence[Token], options, md_env: MutableMapping[str, Any]
+    ) -> nodes.document:
+        """Run the render on a token stream.
+
+        :param tokens: list on block tokens to render
+        :param options: params of parser instance
+        :param md_env: the markdown-it environment sandbox associated with the tokens,
+            containing additional metadata like reference info
+        """
+        self.setup_render(options, md_env)
+        self._render_initialise()
+        self._render_tokens(list(tokens))
+        self._render_finalise()
+        return self.document
+
+    def _render_initialise(self) -> None:
+        """Initialise the render of the document."""
+        self.current_node.extend(
+            html_meta_to_nodes(
+                self.md_config.html_meta,
+                document=self.document,
+                line=0,
+                reporter=self.reporter,
+            )
+        )
+
+    def _render_finalise(self) -> None:
+        """Finalise the render of the document."""
+
+        # log warnings for duplicate reference definitions
+        # "duplicate_refs": [{"href": "ijk", "label": "B", "map": [4, 5], "title": ""}],
+        for dup_ref in self.md_env.get("duplicate_refs", []):
+            self.create_warning(
+                f"Duplicate reference definition: {dup_ref['label']}",
+                line=dup_ref["map"][0] + 1,
+                subtype="ref",
+                append_to=self.document,
+            )
+
+        # we don't use the foot_references stored in the env
+        # since references within directives/roles will have been added after
+        # those from the initial markdown parse
+        # instead we gather them from a walk of the created document
+        foot_refs = OrderedDict()
+        for refnode in findall(self.document)(nodes.footnote_reference):
+            if refnode["refname"] not in foot_refs:
+                foot_refs[refnode["refname"]] = True
+
+        if foot_refs and self.md_config.footnote_transition:
+            self.current_node.append(nodes.transition(classes=["footnotes"]))
+        for footref in foot_refs:
+            foot_ref_tokens = self.md_env["foot_refs"].get(footref, [])
+            if len(foot_ref_tokens) > 1:
+                self.create_warning(
+                    f"Multiple footnote definitions found for label: '{footref}'",
+                    subtype="footnote",
+                    append_to=self.current_node,
+                )
+
+            if len(foot_ref_tokens) < 1:
+                self.create_warning(
+                    f"No footnote definitions found for label: '{footref}'",
+                    subtype="footnote",
+                    append_to=self.current_node,
+                )
+            else:
+                self.render_footnote_reference(foot_ref_tokens[0])
+
+        # Add the wordcount, generated by the ``mdit_py_plugins.wordcount_plugin``.
+        wordcount_metadata = self.md_env.get("wordcount", {})
+        if wordcount_metadata:
+
+            # save the wordcount to the sphinx BuildEnvironment metadata
+            if self.sphinx_env is not None:
+                meta = self.sphinx_env.metadata.setdefault(self.sphinx_env.docname, {})
+                meta["wordcount"] = wordcount_metadata
+
+            # now add the wordcount as substitution definitions,
+            # so we can reference them in the document
+            for key in ("words", "minutes"):
+                value = wordcount_metadata.get(key, None)
+                if value is None:
+                    continue
+                substitution_node = nodes.substitution_definition(
+                    str(value), nodes.Text(str(value))
+                )
+                substitution_node.source = self.document["source"]
+                substitution_node["names"].append(f"wordcount-{key}")
+                self.document.note_substitution_def(
+                    substitution_node, f"wordcount-{key}"
+                )
+
+    def nested_render_text(
+        self, text: str, lineno: int, inline: bool = False, allow_headings: bool = True
+    ) -> None:
+        """Render unparsed text (appending to the current node).
+
+        :param text: the text to render
+        :param lineno: the starting line number of the text, within the full source
+        :param inline: whether the text is inline or block
+        :param allow_headings: whether to allow headings in the text
+        """
+        if inline:
+            tokens = self.md.parseInline(text, self.md_env)
+        else:
+            tokens = self.md.parse(text + "\n", self.md_env)
+
+        # remove front matter, if present, e.g. from included documents
+        if tokens and tokens[0].type == "front_matter":
+            tokens.pop(0)
+
+        # update the line numbers
+        for token in tokens:
+            if token.map:
+                token.map = [token.map[0] + lineno, token.map[1] + lineno]
+
+        current_match_titles = self.md_env.get("match_titles", None)
+        try:
+            self.md_env["match_titles"] = allow_headings
+            self._render_tokens(tokens)
+        finally:
+            self.md_env["match_titles"] = current_match_titles
+
+    @contextmanager
+    def current_node_context(
+        self, node: nodes.Element, append: bool = False
+    ) -> Iterator:
+        """Context manager for temporarily setting the current node."""
+        if append:
+            self.current_node.append(node)
+        current_node = self.current_node
+        self.current_node = node
+        yield
+        self.current_node = current_node
+
+    def render_children(self, token: SyntaxTreeNode) -> None:
+        """Render the children of a token."""
+        for child in token.children or []:
+            if f"render_{child.type}" in self.rules:
+                self.rules[f"render_{child.type}"](child)
+            else:
+                self.create_warning(
+                    f"No render method for: {child.type}",
+                    line=token_line(child, default=0),
+                    subtype="render",
+                    append_to=self.current_node,
+                )
+
+    def add_line_and_source_path(self, node, token: SyntaxTreeNode) -> None:
+        """Copy the line number and document source path to the docutils node."""
+        try:
+            node.line = token_line(token)
+        except ValueError:
+            pass
+        node.source = self.document["source"]
+
+    def add_line_and_source_path_r(
+        self, nodes: list[nodes.Element], token: SyntaxTreeNode
+    ) -> None:
+        """Copy the line number and document source path to the docutils nodes,
+        and recursively to all descendants.
+        """
+        for node in nodes:
+            self.add_line_and_source_path(node, token)
+            for child in findall(node)():
+                self.add_line_and_source_path(child, token)
+
+    def update_section_level_state(self, section: nodes.section, level: int) -> None:
+        """Update the section level state, with the new current section and level."""
+        # find the closest parent section
+        parent_level = max(
+            section_level
+            for section_level in self._level_to_elem
+            if level > section_level
+        )
+        parent = self._level_to_elem[parent_level]
+
+        # if we are jumping up to a non-consecutive level,
+        # then warn about this, since this will not be propagated in the docutils AST
+        if (level > parent_level) and (parent_level + 1 != level):
+            msg = f"Non-consecutive header level increase; H{parent_level} to H{level}"
+            if parent_level == 0:
+                msg = f"Document headings start at H{level}, not H1"
+            self.create_warning(
+                msg,
+                line=section.line,
+                subtype="header",
+                append_to=self.current_node,
+            )
+
+        # append the new section to the parent
+        parent.append(section)
+        # update the state for this section level
+        self._level_to_elem[level] = section
+
+        # Remove all descendant sections from the section level state
+        self._level_to_elem = {
+            section_level: section
+            for section_level, section in self._level_to_elem.items()
+            if section_level <= level
+        }
+
+    def renderInlineAsText(self, tokens: list[SyntaxTreeNode]) -> str:
+        """Special kludge for image `alt` attributes to conform CommonMark spec.
+
+        Don't try to use it! Spec requires to show `alt` content with stripped markup,
+        instead of simple escaping.
+        """
+        result = ""
+
+        for token in tokens or []:
+            if token.type == "text":
+                result += token.content
+            # elif token.type == "image":
+            #     result += self.renderInlineAsText(token.children)
+            else:
+                result += self.renderInlineAsText(token.children or [])
+        return result
+
+    # ### render methods for commonmark tokens
+
+    def render_paragraph(self, token: SyntaxTreeNode) -> None:
+        para = nodes.paragraph(token.children[0].content if token.children else "")
+        self.add_line_and_source_path(para, token)
+        with self.current_node_context(para, append=True):
+            self.render_children(token)
+
+    def render_inline(self, token: SyntaxTreeNode) -> None:
+        self.render_children(token)
+
+    def render_text(self, token: SyntaxTreeNode) -> None:
+        self.current_node.append(nodes.Text(token.content))
+
+    def render_bullet_list(self, token: SyntaxTreeNode) -> None:
+        list_node = nodes.bullet_list()
+        if token.markup:
+            list_node["bullet"] = token.markup
+        if token.attrs.get("class"):
+            # this is used e.g. by tasklist
+            list_node["classes"] = str(token.attrs["class"]).split()
+        self.add_line_and_source_path(list_node, token)
+        with self.current_node_context(list_node, append=True):
+            self.render_children(token)
+
+    def render_ordered_list(self, token: SyntaxTreeNode) -> None:
+        list_node = nodes.enumerated_list(enumtype="arabic", prefix="")
+        list_node["suffix"] = token.markup  # for CommonMark, this should be "." or ")"
+        if "start" in token.attrs:  # starting number
+            list_node["start"] = token.attrs["start"]
+        self.add_line_and_source_path(list_node, token)
+        with self.current_node_context(list_node, append=True):
+            self.render_children(token)
+
+    def render_list_item(self, token: SyntaxTreeNode) -> None:
+        item_node = nodes.list_item()
+        if token.attrs.get("class"):
+            # this is used e.g. by tasklist
+            item_node["classes"] = str(token.attrs["class"]).split()
+        self.add_line_and_source_path(item_node, token)
+        with self.current_node_context(item_node, append=True):
+            self.render_children(token)
+
+    def render_em(self, token: SyntaxTreeNode) -> None:
+        node = nodes.emphasis()
+        self.add_line_and_source_path(node, token)
+        with self.current_node_context(node, append=True):
+            self.render_children(token)
+
+    def render_softbreak(self, token: SyntaxTreeNode) -> None:
+        self.current_node.append(nodes.Text("\n"))
+
+    def render_hardbreak(self, token: SyntaxTreeNode) -> None:
+        self.current_node.append(nodes.raw("", "<br />\n", format="html"))
+        self.current_node.append(nodes.raw("", "\\\\\n", format="latex"))
+
+    def render_strong(self, token: SyntaxTreeNode) -> None:
+        node = nodes.strong()
+        self.add_line_and_source_path(node, token)
+        with self.current_node_context(node, append=True):
+            self.render_children(token)
+
+    def render_blockquote(self, token: SyntaxTreeNode) -> None:
+        quote = nodes.block_quote()
+        self.add_line_and_source_path(quote, token)
+        with self.current_node_context(quote, append=True):
+            self.render_children(token)
+
+    def render_hr(self, token: SyntaxTreeNode) -> None:
+        node = nodes.transition()
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_code_inline(self, token: SyntaxTreeNode) -> None:
+        node = nodes.literal(token.content, token.content)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def create_highlighted_code_block(
+        self,
+        text: str,
+        lexer_name: str | None,
+        number_lines: bool = False,
+        lineno_start: int = 1,
+        source: str | None = None,
+        line: int | None = None,
+        node_cls: type[nodes.Element] = nodes.literal_block,
+    ) -> nodes.Element:
+        """Create a literal block with syntax highlighting.
+
+        This mimics the behaviour of the `code-block` directive.
+
+        In docutils, this directive directly parses the text with the pygments lexer,
+        whereas in sphinx, the lexer name is only recorded as the `language` attribute,
+        and the text is lexed later by pygments within the `visit_literal_block`
+        method of the output format ``SphinxTranslator``.
+
+        Note, this function does not add the literal block to the document.
+        """
+        if self.sphinx_env is not None:
+            node = node_cls(text, text, language=lexer_name or "none")
+            if number_lines:
+                node["linenos"] = True
+                if lineno_start != 1:
+                    node["highlight_args"] = {"linenostart": lineno_start}
+        else:
+            node = node_cls(
+                text, classes=["code"] + ([lexer_name] if lexer_name else [])
+            )
+            try:
+                lex_tokens = Lexer(
+                    text,
+                    lexer_name or "",
+                    "short" if self.md_config.highlight_code_blocks else "none",
+                )
+            except LexerError as err:
+                self.reporter.warning(
+                    str(err),
+                    **{
+                        name: value
+                        for name, value in (("source", source), ("line", line))
+                        if value is not None
+                    },
+                )
+                lex_tokens = Lexer(text, lexer_name or "", "none")
+
+            if number_lines:
+                lex_tokens = NumberLines(
+                    lex_tokens, lineno_start, lineno_start + len(text.splitlines())
+                )
+
+            for classes, value in lex_tokens:
+                if classes:
+                    node += nodes.inline(value, value, classes=classes)
+                else:
+                    # insert as Text to decrease the verbosity of the output
+                    node += nodes.Text(value)
+
+        if source is not None:
+            node.source = source
+        if line is not None:
+            node.line = line
+        return node
+
+    def render_code_block(self, token: SyntaxTreeNode) -> None:
+        lexer = token.info.split()[0] if token.info else None
+        node = self.create_highlighted_code_block(
+            token.content,
+            lexer,
+            source=self.document["source"],
+            line=token_line(token, 0) or None,
+        )
+        self.current_node.append(node)
+
+    def render_fence(self, token: SyntaxTreeNode) -> None:
+        text = token.content
+        # Ensure that we'll have an empty string if info exists but is only spaces
+        info = token.info.strip() if token.info else token.info
+        language = info.split()[0] if info else ""
+
+        if (not self.md_config.commonmark_only) and (not self.md_config.gfm_only):
+            if language == "{eval-rst}":
+                return self.render_restructuredtext(token)
+            if language.startswith("{") and language.endswith("}"):
+                return self.render_directive(token)
+
+        if not language and self.sphinx_env is not None:
+            # use the current highlight setting, via the ``highlight`` directive,
+            # or ``highlight_language`` configuration.
+            language = self.sphinx_env.temp_data.get(
+                "highlight_language", self.sphinx_env.config.highlight_language
+            )
+
+        node = self.create_highlighted_code_block(
+            text,
+            language,
+            number_lines=language in self.md_config.number_code_blocks,
+            source=self.document["source"],
+            line=token_line(token, 0) or None,
+        )
+        self.current_node.append(node)
+
+    @property
+    def blocks_mathjax_processing(self) -> bool:
+        """Only add mathjax ignore classes if using sphinx,
+        and using the ``dollarmath`` extension, and ``myst_update_mathjax=True``.
+        """
+        return (
+            self.sphinx_env is not None
+            and "dollarmath" in self.md_config.enable_extensions
+            and self.md_config.update_mathjax
+        )
+
+    def render_heading(self, token: SyntaxTreeNode) -> None:
+        """Render a heading, e.g. `# Heading`."""
+
+        if self.md_env.get("match_titles", None) is False:
+            # this can occur if a nested parse is performed by a directive
+            # (such as an admonition) which contains a header.
+            # this would break the document structure
+            self.create_warning(
+                "Disallowed nested header found, converting to rubric",
+                line=token_line(token, default=0),
+                subtype="nested_header",
+                append_to=self.current_node,
+            )
+            rubric = nodes.rubric(token.content, "")
+            self.add_line_and_source_path(rubric, token)
+            with self.current_node_context(rubric, append=True):
+                self.render_children(token)
+            return
+
+        level = int(token.tag[1])
+
+        # create the section node
+        new_section = nodes.section()
+        self.add_line_and_source_path(new_section, token)
+        # if a top level section,
+        # then add classes to set default mathjax processing to false
+        # we then turn it back on, on a per-node basis
+        if level == 1 and self.blocks_mathjax_processing:
+            new_section["classes"].extend(["tex2jax_ignore", "mathjax_ignore"])
+
+        # update the state of the section levels
+        self.update_section_level_state(new_section, level)
+
+        # create the title for this section
+        title_node = nodes.title(token.children[0].content if token.children else "")
+        self.add_line_and_source_path(title_node, token)
+        new_section.append(title_node)
+        # render the heading children into the title
+        with self.current_node_context(title_node):
+            self.render_children(token)
+
+        # create a target reference for the section, based on the heading text
+        name = nodes.fully_normalize_name(title_node.astext())
+        new_section["names"].append(name)
+        self.document.note_implicit_target(new_section, new_section)
+
+        # set the section as the current node for subsequent rendering
+        self.current_node = new_section
+
+    def render_link(self, token: SyntaxTreeNode) -> None:
+        """Parse `<http://link.com>` or `[text](link "title")` syntax to docutils AST:
+
+        - If `<>` autolink, forward to `render_autolink`
+        - If `myst_all_links_external` is True, forward to `render_external_url`
+        - If link is an external URL, forward to `render_external_url`
+          - External URLs start with a scheme (e.g. `http:`) in `myst_url_schemes`,
+            or any scheme if  `myst_url_schemes` is None.
+        - Otherwise, forward to `render_internal_link`
+        """
+        if token.info == "auto":  # handles both autolink and linkify
+            return self.render_autolink(token)
+
+        if (
+            self.md_config.commonmark_only
+            or self.md_config.gfm_only
+            or self.md_config.all_links_external
+        ):
+            return self.render_external_url(token)
+
+        # Check for external URL
+        url_scheme = urlparse(cast(str, token.attrGet("href") or "")).scheme
+        allowed_url_schemes = self.md_config.url_schemes
+        if (allowed_url_schemes is None and url_scheme) or (
+            allowed_url_schemes is not None and url_scheme in allowed_url_schemes
+        ):
+            return self.render_external_url(token)
+
+        return self.render_internal_link(token)
+
+    def render_external_url(self, token: SyntaxTreeNode) -> None:
+        """Render link token `[text](link "title")`,
+        where the link has been identified as an external URL::
+
+            <reference refuri="link" title="title">
+                text
+
+        `text` can contain nested syntax, e.g. `[**bold**](url "title")`.
+        """
+        ref_node = nodes.reference()
+        self.add_line_and_source_path(ref_node, token)
+        ref_node["refuri"] = cast(str, token.attrGet("href") or "")
+        title = token.attrGet("title")
+        if title:
+            ref_node["title"] = title
+        with self.current_node_context(ref_node, append=True):
+            self.render_children(token)
+
+    def render_internal_link(self, token: SyntaxTreeNode) -> None:
+        """Render link token `[text](link "title")`,
+        where the link has not been identified as an external URL::
+
+            <reference refname="link" title="title">
+                text
+
+        `text` can contain nested syntax, e.g. `[**bold**](link "title")`.
+
+        Note, this is overridden by `SphinxRenderer`, to use `pending_xref` nodes.
+        """
+        ref_node = nodes.reference()
+        self.add_line_and_source_path(ref_node, token)
+        ref_node["refname"] = cast(str, token.attrGet("href") or "")
+        self.document.note_refname(ref_node)
+        title = token.attrGet("title")
+        if title:
+            ref_node["title"] = title
+        with self.current_node_context(ref_node, append=True):
+            self.render_children(token)
+
+    def render_autolink(self, token: SyntaxTreeNode) -> None:
+        refuri = escapeHtml(token.attrGet("href") or "")  # type: ignore[arg-type]
+        ref_node = nodes.reference()
+        ref_node["refuri"] = refuri
+        self.add_line_and_source_path(ref_node, token)
+        with self.current_node_context(ref_node, append=True):
+            self.render_children(token)
+
+    def render_html_inline(self, token: SyntaxTreeNode) -> None:
+        self.render_html_block(token)
+
+    def render_html_block(self, token: SyntaxTreeNode) -> None:
+        node_list = html_to_nodes(token.content, token_line(token), self)
+        self.current_node.extend(node_list)
+
+    def render_image(self, token: SyntaxTreeNode) -> None:
+        img_node = nodes.image()
+        self.add_line_and_source_path(img_node, token)
+        destination = cast(str, token.attrGet("src") or "")
+
+        if self.md_env.get("relative-images", None) is not None and not is_external_url(
+            destination, None, True
+        ):
+            # make the path relative to an "including" document
+            # this is set when using the `relative-images` option of the MyST `include` directive
+            destination = os.path.normpath(
+                os.path.join(
+                    self.md_env.get("relative-images", ""),
+                    os.path.normpath(destination),
+                )
+            )
+
+        img_node["uri"] = destination
+
+        img_node["alt"] = self.renderInlineAsText(token.children or [])
+        title = token.attrGet("title")
+        if title:
+            img_node["title"] = token.attrGet("title")
+
+        # apply other attributes that can be set on the image
+        if "class" in token.attrs:
+            img_node["classes"].extend(str(token.attrs["class"]).split())
+        if "width" in token.attrs:
+            try:
+                width = directives.length_or_percentage_or_unitless(
+                    str(token.attrs["width"])
+                )
+            except ValueError:
+                self.create_warning(
+                    f"Invalid width value for image: {token.attrs['width']!r}",
+                    line=token_line(token, default=0),
+                    subtype="image",
+                    append_to=self.current_node,
+                )
+            else:
+                img_node["width"] = width
+        if "height" in token.attrs:
+            try:
+                height = directives.length_or_unitless(str(token.attrs["height"]))
+            except ValueError:
+                self.create_warning(
+                    f"Invalid height value for image: {token.attrs['height']!r}",
+                    line=token_line(token, default=0),
+                    subtype="image",
+                    append_to=self.current_node,
+                )
+            else:
+                img_node["height"] = height
+        if "align" in token.attrs:
+            if token.attrs["align"] not in ("left", "center", "right"):
+                self.create_warning(
+                    f"Invalid align value for image: {token.attrs['align']!r}",
+                    line=token_line(token, default=0),
+                    subtype="image",
+                    append_to=self.current_node,
+                )
+            else:
+                img_node["align"] = token.attrs["align"]
+        if "id" in token.attrs:
+            name = nodes.fully_normalize_name(str(token.attrs["id"]))
+            img_node["names"].append(name)
+            self.document.note_explicit_target(img_node, img_node)
+
+        self.current_node.append(img_node)
+
+    # ### render methods for plugin tokens
+
+    def render_front_matter(self, token: SyntaxTreeNode) -> None:
+        """Pass document front matter data."""
+        position = token_line(token, default=0)
+
+        if isinstance(token.content, str):
+            try:
+                data = yaml.safe_load(token.content)
+            except (yaml.parser.ParserError, yaml.scanner.ScannerError):
+                self.create_warning(
+                    "Malformed YAML",
+                    line=position,
+                    append_to=self.current_node,
+                    subtype="topmatter",
+                )
+                return
+        else:
+            data = token.content
+
+        if not isinstance(data, dict):
+            self.create_warning(
+                f"YAML is not a dict: {type(data)}",
+                line=position,
+                append_to=self.current_node,
+                subtype="topmatter",
+            )
+            return
+
+        fields = {
+            k: v
+            for k, v in data.items()
+            if k not in ("myst", "mystnb", "substitutions", "html_meta")
+        }
+        if fields:
+            field_list = self.dict_to_fm_field_list(
+                fields, language_code=self.document.settings.language_code
+            )
+            self.current_node.append(field_list)
+
+        if data.get("title") and self.md_config.title_to_header:
+            self.nested_render_text(f"# {data['title']}", 0)
+
+    def dict_to_fm_field_list(
+        self, data: dict[str, Any], language_code: str, line: int = 0
+    ) -> nodes.field_list:
+        """Render each key/val pair as a docutils ``field_node``.
+
+        Bibliographic keys below will be parsed as Markdown,
+        all others will be left as literal text.
+
+        The field list should be at the start of the document,
+        and will then be converted to a `docinfo` node during the
+        `docutils.docutils.transforms.frontmatter.DocInfo` transform (priority 340),
+        and bibliographic keys (or their translation) will be converted to nodes::
+
+            {'author': docutils.nodes.author,
+            'authors': docutils.nodes.authors,
+            'organization': docutils.nodes.organization,
+            'address': docutils.nodes.address,
+            'contact': docutils.nodes.contact,
+            'version': docutils.nodes.version,
+            'revision': docutils.nodes.revision,
+            'status': docutils.nodes.status,
+            'date': docutils.nodes.date,
+            'copyright': docutils.nodes.copyright,
+            'dedication': docutils.nodes.topic,
+            'abstract': docutils.nodes.topic}
+
+        Also, the 'dedication' and 'abstract' will be placed outside the `docinfo`,
+        and so will always be shown in the document.
+
+        If using sphinx, this `docinfo` node will later be extracted from the AST,
+        by the `DoctreeReadEvent` transform (priority 880),
+        calling `MetadataCollector.process_doc`.
+        In this case keys and values will be converted to strings and stored in
+        `app.env.metadata[app.env.docname]`
+
+        See
+        https://www.sphinx-doc.org/en/master/usage/restructuredtext/field-lists.html
+        for docinfo fields used by sphinx.
+
+        """
+        field_list = nodes.field_list()
+        field_list.source, field_list.line = self.document["source"], line
+
+        bibliofields = get_language(language_code).bibliographic_fields
+
+        for key, value in data.items():
+            if not isinstance(value, (str, int, float, date, datetime)):
+                value = json.dumps(value)
+            value = str(value)
+            body = nodes.paragraph()
+            body.source, body.line = self.document["source"], line
+            if key in bibliofields:
+                with self.current_node_context(body):
+                    self.nested_render_text(value, line, inline=True)
+            else:
+                body += nodes.literal(value, value)
+
+            field_node = nodes.field()
+            field_node.source = value
+            field_node += nodes.field_name(key, "", nodes.Text(key))
+            field_node += nodes.field_body(value, *[body])
+            field_list += field_node
+
+        return field_list
+
+    def render_table(self, token: SyntaxTreeNode) -> None:
+
+        # markdown-it table always contains at least a header:
+        assert token.children
+        header = token.children[0]
+        # with one header row
+        assert header.children
+        header_row = header.children[0]
+        assert header_row.children
+
+        # top-level element
+        table = nodes.table()
+        table["classes"] += ["colwidths-auto"]
+        self.add_line_and_source_path(table, token)
+        self.current_node.append(table)
+
+        # column settings element
+        maxcols = len(header_row.children)
+        colwidths = [100 // maxcols] * maxcols
+        tgroup = nodes.tgroup(cols=len(colwidths))
+        table += tgroup
+        for colwidth in colwidths:
+            colspec = nodes.colspec(colwidth=colwidth)
+            tgroup += colspec
+
+        # header
+        thead = nodes.thead()
+        tgroup += thead
+        with self.current_node_context(thead):
+            self.render_table_row(header_row)
+
+        # body
+        if len(token.children) > 1:
+            body = token.children[1]
+            tbody = nodes.tbody()
+            tgroup += tbody
+            with self.current_node_context(tbody):
+                for body_row in body.children or []:
+                    self.render_table_row(body_row)
+
+    def render_table_row(self, token: SyntaxTreeNode) -> None:
+        row = nodes.row()
+        with self.current_node_context(row, append=True):
+            for child in token.children or []:
+                entry = nodes.entry()
+                para = nodes.paragraph(
+                    child.children[0].content if child.children else ""
+                )
+                style = child.attrGet("style")  # i.e. the alignment when using e.g. :--
+                if style and style in (
+                    "text-align:left",
+                    "text-align:right",
+                    "text-align:center",
+                ):
+                    entry["classes"].append(f"text-{cast(str, style).split(':')[1]}")
+                with self.current_node_context(entry, append=True):
+                    with self.current_node_context(para, append=True):
+                        self.render_children(child)
+
+    def render_s(self, token: SyntaxTreeNode) -> None:
+        """Render a strikethrough token."""
+        # TODO strikethrough not currently directly supported in docutils
+        self.create_warning(
+            "Strikethrough is currently only supported in HTML output",
+            line=token_line(token, 0),
+            subtype="strikethrough",
+            append_to=self.current_node,
+        )
+        self.current_node.append(nodes.raw("", "<s>", format="html"))
+        self.render_children(token)
+        self.current_node.append(nodes.raw("", "</s>", format="html"))
+
+    def render_math_inline(self, token: SyntaxTreeNode) -> None:
+        content = token.content
+        node = nodes.math(content, content)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_math_inline_double(self, token: SyntaxTreeNode) -> None:
+        content = token.content
+        node = nodes.math_block(content, content, nowrap=False, number=None)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_math_single(self, token: SyntaxTreeNode) -> None:
+        content = token.content
+        node = nodes.math(content, content)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_math_block(self, token: SyntaxTreeNode) -> None:
+        content = token.content
+        node = nodes.math_block(content, content, nowrap=False, number=None)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_amsmath(self, token: SyntaxTreeNode) -> None:
+        # note docutils does not currently support the nowrap attribute
+        # or equation numbering, so this is overridden in the sphinx renderer
+        node = nodes.math_block(
+            token.content, token.content, nowrap=True, classes=["amsmath"]
+        )
+        if token.meta["numbered"] != "*":
+            node["numbered"] = True
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def render_footnote_ref(self, token: SyntaxTreeNode) -> None:
+        """Footnote references are added as auto-numbered,
+        .i.e. `[^a]` is read as rST `[#a]_`
+        """
+        target = token.meta["label"]
+
+        refnode = nodes.footnote_reference(f"[^{target}]")
+        self.add_line_and_source_path(refnode, token)
+        if not target.isdigit():
+            refnode["auto"] = 1
+            self.document.note_autofootnote_ref(refnode)
+        else:
+            refnode += nodes.Text(target)
+
+        refnode["refname"] = target
+        self.document.note_footnote_ref(refnode)
+
+        self.current_node.append(refnode)
+
+    def render_footnote_reference(self, token: SyntaxTreeNode) -> None:
+        target = token.meta["label"]
+
+        footnote = nodes.footnote()
+        self.add_line_and_source_path(footnote, token)
+        footnote["names"].append(target)
+        if not target.isdigit():
+            footnote["auto"] = 1
+            self.document.note_autofootnote(footnote)
+        else:
+            footnote += nodes.label("", target)
+            self.document.note_footnote(footnote)
+        self.document.note_explicit_target(footnote, footnote)
+        with self.current_node_context(footnote, append=True):
+            self.render_children(token)
+
+    def render_myst_block_break(self, token: SyntaxTreeNode) -> None:
+        block_break = nodes.comment(token.content, token.content)
+        block_break["classes"] += ["block_break"]
+        self.add_line_and_source_path(block_break, token)
+        self.current_node.append(block_break)
+
+    def render_myst_target(self, token: SyntaxTreeNode) -> None:
+        text = token.content
+        name = nodes.fully_normalize_name(text)
+        target = nodes.target(text)
+        target["names"].append(name)
+        self.add_line_and_source_path(target, token)
+        self.document.note_explicit_target(target, self.current_node)
+        self.current_node.append(target)
+
+    def render_myst_line_comment(self, token: SyntaxTreeNode) -> None:
+        self.current_node.append(nodes.comment(token.content, token.content.strip()))
+
+    def render_myst_role(self, token: SyntaxTreeNode) -> None:
+        name = token.meta["name"]
+        text = token.content
+        rawsource = f":{name}:`{token.content}`"
+        lineno = token_line(token) if token.map else 0
+        role_func, messages = roles.role(
+            name, self.language_module_rst, lineno, self.reporter
+        )
+        inliner = MockInliner(self)
+        if role_func:
+            nodes, messages2 = role_func(name, rawsource, text, lineno, inliner)
+            # return nodes, messages + messages2
+            self.current_node += nodes
+        else:
+            message = self.reporter.error(
+                f'Unknown interpreted text role "{name}".', line=lineno
+            )
+            problematic = inliner.problematic(text, rawsource, message)
+            self.current_node += problematic
+
+    def render_colon_fence(self, token: SyntaxTreeNode) -> None:
+        """Render a code fence with ``:`` colon delimiters."""
+
+        if token.content.startswith(":::"):
+            # the content starts with a nested fence block,
+            # but must distinguish between ``:options:``, so we add a new line
+            assert token.token is not None, '"colon_fence" must have a `token`'
+            linear_token = token.token.copy()
+            linear_token.content = "\n" + linear_token.content
+            token.token = linear_token
+
+        return self.render_fence(token)
+
+    def render_dl(self, token: SyntaxTreeNode) -> None:
+        """Render a definition list."""
+        node = nodes.definition_list(classes=["simple", "myst"])
+        self.add_line_and_source_path(node, token)
+        with self.current_node_context(node, append=True):
+            item = None
+            for child in token.children or []:
+                if child.type == "dt":
+                    item = nodes.definition_list_item()
+                    self.add_line_and_source_path(item, child)
+                    with self.current_node_context(item, append=True):
+                        term = nodes.term(
+                            child.children[0].content if child.children else ""
+                        )
+                        self.add_line_and_source_path(term, child)
+                        with self.current_node_context(term, append=True):
+                            self.render_children(child)
+                elif child.type == "dd":
+                    if item is None:
+                        error = self.reporter.error(
+                            (
+                                "Found a definition in a definition list, "
+                                "with no preceding term"
+                            ),
+                            # nodes.literal_block(content, content),
+                            line=token_line(child),
+                        )
+                        self.current_node += [error]
+                    with self.current_node_context(item):
+                        definition = nodes.definition()
+                        self.add_line_and_source_path(definition, child)
+                        with self.current_node_context(definition, append=True):
+                            self.render_children(child)
+                else:
+                    error_msg = self.reporter.error(
+                        (
+                            "Expected a term/definition as a child of a definition list"
+                            f", but found a: {child.type}"
+                        ),
+                        # nodes.literal_block(content, content),
+                        line=token_line(child),
+                    )
+                    self.current_node += [error_msg]
+
+    def render_field_list(self, token: SyntaxTreeNode) -> None:
+        """Render a field list."""
+        field_list = nodes.field_list(classes=["myst"])
+        self.add_line_and_source_path(field_list, token)
+        with self.current_node_context(field_list, append=True):
+            # raise ValueError(token.pretty(show_text=True))
+            children = (token.children or [])[:]
+            while children:
+                child = children.pop(0)
+                if not child.type == "fieldlist_name":
+                    error_msg = self.reporter.error(
+                        (
+                            "Expected a fieldlist_name as a child of a field_list"
+                            f", but found a: {child.type}"
+                        ),
+                        # nodes.literal_block(content, content),
+                        line=token_line(child),
+                    )
+                    self.current_node += [error_msg]
+                    break
+                field = nodes.field()
+                self.add_line_and_source_path(field, child)
+                field_list += field
+                field_name = nodes.field_name()
+                self.add_line_and_source_path(field_name, child)
+                field += field_name
+                with self.current_node_context(field_name):
+                    self.render_children(child)
+                field_body = nodes.field_body()
+                self.add_line_and_source_path(field_name, child)
+                field += field_body
+                if children and children[0].type == "fieldlist_body":
+                    child = children.pop(0)
+                    with self.current_node_context(field_body):
+                        self.render_children(child)
+
+    def render_restructuredtext(self, token: SyntaxTreeNode) -> None:
+        """Render the content of the token as restructuredtext."""
+        # copy necessary elements (source, line no, env, reporter)
+        newdoc = make_document()
+        newdoc["source"] = self.document["source"]
+        newdoc.settings = self.document.settings
+        newdoc.reporter = self.reporter
+        # pad the line numbers artificially so they offset with the fence block
+        pseudosource = ("\n" * token_line(token)) + token.content
+        # actually parse the rst into our document
+        MockRSTParser().parse(pseudosource, newdoc)
+        for node in newdoc:
+            if node["names"]:
+                self.document.note_explicit_target(node, node)
+        self.current_node.extend(newdoc.children)
+
+    def render_directive(self, token: SyntaxTreeNode) -> None:
+        """Render special fenced code blocks as directives."""
+        first_line = token.info.split(maxsplit=1)
+        name = first_line[0][1:-1]
+        arguments = "" if len(first_line) == 1 else first_line[1]
+        content = token.content
+        position = token_line(token)
+        nodes_list = self.run_directive(name, arguments, content, position)
+        self.current_node += nodes_list
+
+    def run_directive(
+        self, name: str, first_line: str, content: str, position: int
+    ) -> list[nodes.Element]:
+        """Run a directive and return the generated nodes.
+
+        :param name: the name of the directive
+        :param first_line: The text on the same line as the directive name.
+            May be an argument or body text, dependent on the directive
+        :param content: All text after the first line. Can include options.
+        :param position: The line number of the first line
+
+        """
+        # TODO directive name white/black lists
+
+        self.document.current_line = position
+
+        # get directive class
+        output: tuple[Directive, list] = directives.directive(
+            name, self.language_module_rst, self.document
+        )
+        directive_class, messages = output
+        if not directive_class:
+            error = self.reporter.error(
+                f'Unknown directive type "{name}".\n',
+                # nodes.literal_block(content, content),
+                line=position,
+            )
+            return [error] + messages
+
+        if issubclass(directive_class, Include):
+            # this is a Markdown only option,
+            # to allow for altering relative image reference links
+            directive_class.option_spec["relative-images"] = directives.flag
+            directive_class.option_spec["relative-docs"] = directives.path
+
+        try:
+            arguments, options, body_lines, content_offset = parse_directive_text(
+                directive_class, first_line, content
+            )
+        except DirectiveParsingError as error:
+            error = self.reporter.error(
+                f"Directive '{name}': {error}",
+                nodes.literal_block(content, content),
+                line=position,
+            )
+            return [error]
+
+        # initialise directive
+        if issubclass(directive_class, Include):
+            directive_instance = MockIncludeDirective(
+                self,
+                name=name,
+                klass=directive_class,
+                arguments=arguments,
+                options=options,
+                body=body_lines,
+                lineno=position,
+            )
+        else:
+            state_machine = MockStateMachine(self, position)
+            state = MockState(self, state_machine, position)
+            directive_instance = directive_class(
+                name=name,
+                # the list of positional arguments
+                arguments=arguments,
+                # a dictionary mapping option names to values
+                options=options,
+                # the directive content line by line
+                content=StringList(body_lines, self.document["source"]),
+                # the absolute line number of the first line of the directive
+                lineno=position,
+                # the line offset of the first line of the content
+                content_offset=content_offset,
+                # a string containing the entire directive
+                block_text="\n".join(body_lines),
+                state=state,
+                state_machine=state_machine,
+            )
+
+        # run directive
+        try:
+            result = directive_instance.run()
+        except DirectiveError as error:
+            msg_node = self.reporter.system_message(
+                error.level, error.msg, line=position
+            )
+            msg_node += nodes.literal_block(content, content)
+            result = [msg_node]
+        except MockingError as exc:
+            error_msg = self.reporter.error(
+                "Directive '{}' cannot be mocked: {}: {}".format(
+                    name, exc.__class__.__name__, exc
+                ),
+                nodes.literal_block(content, content),
+                line=position,
+            )
+            return [error_msg]
+
+        assert isinstance(
+            result, list
+        ), f'Directive "{name}" must return a list of nodes.'
+        for i in range(len(result)):
+            assert isinstance(
+                result[i], nodes.Node
+            ), 'Directive "{}" returned non-Node object (index {}): {}'.format(
+                name, i, result[i]
+            )
+        return result
+
+    def render_substitution_inline(self, token: SyntaxTreeNode) -> None:
+        """Render inline substitution {{key}}."""
+        self.render_substitution(token, inline=True)
+
+    def render_substitution_block(self, token: SyntaxTreeNode) -> None:
+        """Render block substitution {{key}}."""
+        self.render_substitution(token, inline=False)
+
+    def render_substitution(self, token: SyntaxTreeNode, inline: bool) -> None:
+        """Substitutions are rendered by:
+
+        1. Combining global substitutions with front-matter substitutions
+           to create a variable context (front-matter takes priority)
+        2. Add the sphinx `env` to the variable context (if available)
+        3. Create the string content with Jinja2 (passing it the variable context)
+        4. If the substitution is inline and not a directive,
+           parse to nodes ignoring block syntaxes (like lists or block-quotes),
+           otherwise parse to nodes with all syntax rules.
+
+        """
+        position = token_line(token)
+
+        # front-matter substitutions take priority over config ones
+        variable_context: dict[str, Any] = {**self.md_config.substitutions}
+        if self.sphinx_env is not None:
+            variable_context["env"] = self.sphinx_env
+
+        # fail on undefined variables
+        env = jinja2.Environment(undefined=jinja2.StrictUndefined)
+
+        # try rendering
+        try:
+            rendered = env.from_string(f"{{{{{token.content}}}}}").render(
+                variable_context
+            )
+        except Exception as error:
+            error_msg = self.reporter.error(
+                f"Substitution error:{error.__class__.__name__}: {error}",
+                line=position,
+            )
+            self.current_node += [error_msg]
+            return
+
+        # handle circular references
+        ast = env.parse(f"{{{{{token.content}}}}}")
+        references = {
+            n.name for n in ast.find_all(jinja2.nodes.Name) if n.name != "env"
+        }
+        self.document.sub_references = getattr(self.document, "sub_references", set())
+        cyclic = references.intersection(self.document.sub_references)
+        if cyclic:
+            error_msg = self.reporter.error(
+                f"circular substitution reference: {cyclic}",
+                line=position,
+            )
+            self.current_node += [error_msg]
+            return
+
+        # TODO improve error reporting;
+        # at present, for a multi-line substitution,
+        # an error may point to a line lower than the substitution
+        # should it point to the source of the substitution?
+        # or the error message should at least indicate that its a substitution
+
+        # we record used references before nested parsing, then remove them after
+        self.document.sub_references.update(references)
+        try:
+            if inline and not REGEX_DIRECTIVE_START.match(rendered):
+                self.nested_render_text(rendered, position, inline=True)
+            else:
+                self.nested_render_text(rendered, position, allow_headings=False)
+        finally:
+            self.document.sub_references.difference_update(references)
+
+
+def html_meta_to_nodes(
+    data: dict[str, Any], document: nodes.document, line: int, reporter: Reporter
+) -> list[nodes.pending | nodes.system_message]:
+    """Replicate the `meta` directive,
+    by converting a dictionary to a list of pending meta nodes
+
+    See:
+    https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#html-metadata
+    """
+    if not data:
+        return []
+
+    try:
+        # if sphinx available
+        from sphinx.addnodes import meta as meta_cls
+    except ImportError:
+        try:
+            # docutils >= 0.19
+            meta_cls = nodes.meta  # type: ignore
+        except AttributeError:
+            from docutils.parsers.rst.directives.html import MetaBody
+
+            meta_cls = MetaBody.meta  # type: ignore
+
+    output = []
+
+    for key, value in data.items():
+        content = str(value or "")
+        meta_node = meta_cls(content)
+        meta_node.source = document["source"]
+        meta_node.line = line
+        meta_node["content"] = content
+        try:
+            if not content:
+                raise ValueError("No content")
+            for i, key_part in enumerate(key.split()):
+                if "=" not in key_part and i == 0:
+                    meta_node["name"] = key_part
+                    continue
+                if "=" not in key_part:
+                    raise ValueError(f"no '=' in {key_part}")
+                attr_name, attr_val = key_part.split("=", 1)
+                if not (attr_name and attr_val):
+                    raise ValueError(f"malformed {key_part}")
+                meta_node[attr_name.lower()] = attr_val
+        except ValueError as error:
+            msg = reporter.error(f'Error parsing meta tag attribute "{key}": {error}.')
+            output.append(msg)
+            continue
+
+        pending = nodes.pending(
+            Filter,
+            {"component": "writer", "format": "html", "nodes": [meta_node]},
+        )
+        document.note_pending(pending)
+        output.append(pending)
+
+    return output
diff --git a/myst_parser/mdit_to_docutils/html_to_nodes.py b/myst_parser/mdit_to_docutils/html_to_nodes.py
new file mode 100644
index 0000000..2cc3066
--- /dev/null
+++ b/myst_parser/mdit_to_docutils/html_to_nodes.py
@@ -0,0 +1,139 @@
+"""Convert HTML to docutils nodes."""
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+
+from docutils import nodes
+
+from myst_parser.parsers.parse_html import Data, tokenize_html
+
+if TYPE_CHECKING:
+    from .base import DocutilsRenderer
+
+
+def make_error(
+    document: nodes.document, error_msg: str, text: str, line_number: int
+) -> nodes.system_message:
+    return document.reporter.error(
+        error_msg,
+        nodes.literal_block(text, text),
+        line=line_number,
+    )
+
+
+OPTION_KEYS_IMAGE = {"class", "alt", "height", "width", "align", "name"}
+# note: docutils also has scale and target
+
+OPTION_KEYS_ADMONITION = {"class", "name"}
+
+# See https://github.com/micromark/micromark-extension-gfm-tagfilter
+RE_FLOW = re.compile(
+    r"<(\/?)(iframe|noembed|noframes|plaintext|script|style|title|textarea|xmp)(?=[\t\n\f\r />])",
+    re.IGNORECASE,
+)
+
+
+def default_html(text: str, source: str, line_number: int) -> list[nodes.Element]:
+    raw_html = nodes.raw("", text, format="html")
+    raw_html.source = source
+    raw_html.line = line_number
+    return [raw_html]
+
+
+def html_to_nodes(
+    text: str, line_number: int, renderer: DocutilsRenderer
+) -> list[nodes.Element]:
+    """Convert HTML to docutils nodes."""
+    if renderer.md_config.gfm_only:
+        text, _ = RE_FLOW.subn(lambda s: s.group(0).replace("<", "&lt;"), text)
+
+    enable_html_img = "html_image" in renderer.md_config.enable_extensions
+    enable_html_admonition = "html_admonition" in renderer.md_config.enable_extensions
+    if not (enable_html_img or enable_html_admonition):
+        return default_html(text, renderer.document["source"], line_number)
+
+    # parse the HTML to AST
+    try:
+        root = tokenize_html(text).strip(inplace=True, recurse=False)
+    except Exception:
+        msg_node = renderer.create_warning(
+            "HTML could not be parsed", line=line_number, subtype="html"
+        )
+        return ([msg_node] if msg_node else []) + default_html(
+            text, renderer.document["source"], line_number
+        )
+
+    if len(root) < 1:
+        # if empty
+        return default_html(text, renderer.document["source"], line_number)
+
+    if not all(
+        (enable_html_img and child.name == "img")
+        or (
+            enable_html_admonition
+            and child.name == "div"
+            and "admonition" in child.attrs.classes
+        )
+        for child in root
+    ):
+        return default_html(text, renderer.document["source"], line_number)
+
+    nodes_list = []
+    for child in root:
+
+        if child.name == "img":
+            if "src" not in child.attrs:
+                return [
+                    renderer.reporter.error(
+                        "<img> missing 'src' attribute", line=line_number
+                    )
+                ]
+            content = "\n".join(
+                f":{k}: {v}"
+                for k, v in sorted(child.attrs.items())
+                if k in OPTION_KEYS_IMAGE
+            )
+            nodes_list.extend(
+                renderer.run_directive(
+                    "image", child.attrs["src"], content, line_number
+                )
+            )
+
+        else:
+            children = child.strip().children
+            if (
+                children
+                and children[0].name in ("div", "p")
+                and (
+                    "title" in children[0].attrs.classes
+                    or "admonition-title" in children[0].attrs.classes
+                )
+            ):
+                title = "".join(child.render() for child in children.pop(0))
+            else:
+                title = "Note"
+
+            options = "\n".join(
+                f":{k}: {v}"
+                for k, v in sorted(child.attrs.items())
+                if k in OPTION_KEYS_ADMONITION
+            ).rstrip()
+            new_children = []
+            for child in children:
+                if child.name == "p":
+                    new_children.extend(child.children)
+                    new_children.append(Data("\n\n"))
+                else:
+                    new_children.append(child)
+            content = (
+                options
+                + ("\n\n" if options else "")
+                + "".join(child.render() for child in new_children).lstrip()
+            )
+
+            nodes_list.extend(
+                renderer.run_directive("admonition", title, content, line_number)
+            )
+
+    return nodes_list
diff --git a/myst_parser/mdit_to_docutils/sphinx_.py b/myst_parser/mdit_to_docutils/sphinx_.py
new file mode 100644
index 0000000..3c1bc23
--- /dev/null
+++ b/myst_parser/mdit_to_docutils/sphinx_.py
@@ -0,0 +1,245 @@
+"""Convert Markdown-it tokens to docutils nodes, including sphinx specific elements."""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import cast
+from urllib.parse import unquote
+from uuid import uuid4
+
+from docutils import nodes
+from markdown_it.tree import SyntaxTreeNode
+from sphinx import addnodes
+from sphinx.domains.math import MathDomain
+from sphinx.domains.std import StandardDomain
+from sphinx.environment import BuildEnvironment
+from sphinx.util import logging
+from sphinx.util.nodes import clean_astext
+
+from myst_parser.mdit_to_docutils.base import DocutilsRenderer
+
+LOGGER = logging.getLogger(__name__)
+
+
+def create_warning(
+    document: nodes.document,
+    message: str,
+    *,
+    line: int | None = None,
+    append_to: nodes.Element | None = None,
+    wtype: str = "myst",
+    subtype: str = "other",
+) -> nodes.system_message | None:
+    """Generate a warning, logging it if necessary.
+
+    If the warning type is listed in the ``suppress_warnings`` configuration,
+    then ``None`` will be returned and no warning logged.
+    """
+    message = f"{message} [{wtype}.{subtype}]"
+    kwargs = {"line": line} if line is not None else {}
+
+    if logging.is_suppressed_warning(
+        wtype, subtype, document.settings.env.app.config.suppress_warnings
+    ):
+        return None
+
+    msg_node = document.reporter.warning(message, **kwargs)
+    if append_to is not None:
+        append_to.append(msg_node)
+
+    return None
+
+
+class SphinxRenderer(DocutilsRenderer):
+    """A markdown-it-py renderer to populate (in-place) a `docutils.document` AST.
+
+    This is sub-class of `DocutilsRenderer` that handles sphinx specific aspects,
+    such as cross-referencing.
+    """
+
+    @property
+    def doc_env(self) -> BuildEnvironment:
+        return self.document.settings.env
+
+    def create_warning(
+        self,
+        message: str,
+        *,
+        line: int | None = None,
+        append_to: nodes.Element | None = None,
+        wtype: str = "myst",
+        subtype: str = "other",
+    ) -> nodes.system_message | None:
+        """Generate a warning, logging it if necessary.
+
+        If the warning type is listed in the ``suppress_warnings`` configuration,
+        then ``None`` will be returned and no warning logged.
+        """
+        return create_warning(
+            self.document,
+            message,
+            line=line,
+            append_to=append_to,
+            wtype=wtype,
+            subtype=subtype,
+        )
+
+    def render_internal_link(self, token: SyntaxTreeNode) -> None:
+        """Render link token `[text](link "title")`,
+        where the link has not been identified as an external URL.
+        """
+        destination = unquote(cast(str, token.attrGet("href") or ""))
+
+        # make the path relative to an "including" document
+        # this is set when using the `relative-docs` option of the MyST `include` directive
+        relative_include = self.md_env.get("relative-docs", None)
+        if relative_include is not None and destination.startswith(relative_include[0]):
+            source_dir, include_dir = relative_include[1:]
+            destination = os.path.relpath(
+                os.path.join(include_dir, os.path.normpath(destination)), source_dir
+            )
+
+        potential_path = (
+            Path(self.doc_env.doc2path(self.doc_env.docname)).parent / destination
+            if self.doc_env.srcdir  # not set in some test situations
+            else None
+        )
+        if (
+            potential_path
+            and potential_path.is_file()
+            and not any(
+                destination.endswith(suffix)
+                for suffix in self.doc_env.config.source_suffix
+            )
+        ):
+            wrap_node = addnodes.download_reference(
+                refdoc=self.doc_env.docname,
+                reftarget=destination,
+                reftype="myst",
+                refdomain=None,  # Added to enable cross-linking
+                refexplicit=len(token.children or []) > 0,
+                refwarn=False,
+            )
+            classes = ["xref", "download", "myst"]
+            text = destination if not token.children else ""
+        else:
+            wrap_node = addnodes.pending_xref(
+                refdoc=self.doc_env.docname,
+                reftarget=destination,
+                reftype="myst",
+                refdomain=None,  # Added to enable cross-linking
+                refexplicit=len(token.children or []) > 0,
+                refwarn=True,
+            )
+            classes = ["xref", "myst"]
+            text = ""
+
+        self.add_line_and_source_path(wrap_node, token)
+        title = token.attrGet("title")
+        if title:
+            wrap_node["title"] = title
+        self.current_node.append(wrap_node)
+
+        inner_node = nodes.inline("", text, classes=classes)
+        wrap_node.append(inner_node)
+        with self.current_node_context(inner_node):
+            self.render_children(token)
+
+    def render_heading(self, token: SyntaxTreeNode) -> None:
+        """This extends the docutils method, to allow for the addition of heading ids.
+        These ids are computed by the ``markdown-it-py`` ``anchors_plugin``
+        as "slugs" which are unique to a document.
+
+        The approach is similar to ``sphinx.ext.autosectionlabel``
+        """
+        super().render_heading(token)
+
+        if not isinstance(self.current_node, nodes.section):
+            return
+
+        # create the slug string
+        slug = cast(str, token.attrGet("id"))
+        if slug is None:
+            return
+
+        section = self.current_node
+        doc_slug = self.doc_env.doc2path(self.doc_env.docname, base=False) + "#" + slug
+
+        # save the reference in the standard domain, so that it can be handled properly
+        domain = cast(StandardDomain, self.doc_env.get_domain("std"))
+        if doc_slug in domain.labels:
+            other_doc = self.doc_env.doc2path(domain.labels[doc_slug][0])
+            self.create_warning(
+                f"duplicate label {doc_slug}, other instance in {other_doc}",
+                line=section.line,
+                subtype="anchor",
+            )
+        labelid = section["ids"][0]
+        domain.anonlabels[doc_slug] = self.doc_env.docname, labelid
+        domain.labels[doc_slug] = (
+            self.doc_env.docname,
+            labelid,
+            clean_astext(section[0]),
+        )
+
+        self.doc_env.metadata[self.doc_env.docname]["myst_anchors"] = True
+        section["myst-anchor"] = doc_slug
+
+    def render_math_block_label(self, token: SyntaxTreeNode) -> None:
+        """Render math with referencable labels, e.g. ``$a=1$ (label)``."""
+        label = token.info
+        content = token.content
+        node = nodes.math_block(
+            content, content, nowrap=False, number=None, label=label
+        )
+        target = self.add_math_target(node)
+        self.add_line_and_source_path(target, token)
+        self.current_node.append(target)
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def _random_label(self) -> str:
+        return str(uuid4())
+
+    def render_amsmath(self, token: SyntaxTreeNode) -> None:
+        """Renderer for the amsmath extension."""
+        # environment = token.meta["environment"]
+        content = token.content
+
+        if token.meta["numbered"] != "*":
+            # TODO how to parse and reference labels within environment?
+            # for now we give create a unique hash, so the equation will be numbered
+            # but there will be no reference clashes
+            label = self._random_label()
+            node = nodes.math_block(
+                content,
+                content,
+                nowrap=True,
+                number=None,
+                classes=["amsmath"],
+                label=label,
+            )
+            target = self.add_math_target(node)
+            self.add_line_and_source_path(target, token)
+            self.current_node.append(target)
+        else:
+            node = nodes.math_block(
+                content, content, nowrap=True, number=None, classes=["amsmath"]
+            )
+        self.add_line_and_source_path(node, token)
+        self.current_node.append(node)
+
+    def add_math_target(self, node: nodes.math_block) -> nodes.target:
+        # Code mainly copied from sphinx.directives.patches.MathDirective
+
+        # register label to domain
+        domain = cast(MathDomain, self.doc_env.get_domain("math"))
+        domain.note_equation(self.doc_env.docname, node["label"], location=node)
+        node["number"] = domain.get_equation_number_for(node["label"])
+        node["docname"] = self.doc_env.docname
+
+        # create target node
+        node_id = nodes.make_id("equation-%s" % node["label"])
+        target = nodes.target("", "", ids=[node_id])
+        self.document.note_explicit_target(target)
+        return target
diff --git a/myst_parser/mdit_to_docutils/utils.py b/myst_parser/mdit_to_docutils/utils.py
new file mode 100644
index 0000000..b31d8c7
--- /dev/null
+++ b/myst_parser/mdit_to_docutils/utils.py
@@ -0,0 +1,36 @@
+import html
+from typing import Iterable, Optional
+from urllib.parse import quote, urlparse
+
+
+def escape_url(raw: str) -> str:
+    """
+    Escape urls to prevent code injection craziness. (Hopefully.)
+    """
+    return html.escape(quote(html.unescape(raw), safe="/#:()*?=%@+,&"))
+
+
+def is_external_url(
+    reference: str,
+    known_url_schemes: Optional[Iterable[str]],
+    match_fragment: bool = False,
+) -> bool:
+    """Return if a reference should be recognised as an external URL.
+
+    URLs are of the format: scheme://netloc/path;parameters?query#fragment
+
+    This checks if there is a url scheme (e.g. 'https') and, if so,
+    if the scheme is is the list of known_url_schemes (if supplied).
+
+    :param known_url_schemes: e.g. ["http", "https", "mailto"]
+        If None, match all schemes
+    :param match_fragment: If True and a fragment found, then True will be returned,
+        irrespective of a scheme match
+
+    """
+    url_check = urlparse(reference)
+    if known_url_schemes is not None:
+        scheme_known = url_check.scheme in known_url_schemes
+    else:
+        scheme_known = bool(url_check.scheme)
+    return scheme_known or (match_fragment and url_check.fragment != "")
diff --git a/myst_parser/mocking.py b/myst_parser/mocking.py
new file mode 100644
index 0000000..b22475d
--- /dev/null
+++ b/myst_parser/mocking.py
@@ -0,0 +1,514 @@
+"""This module provides classes to Mock the core components of the docutils.RSTParser,
+the key difference being that nested parsing treats the text as Markdown not rST.
+"""
+from __future__ import annotations
+
+import os
+import re
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from docutils import nodes
+from docutils.parsers.rst import Directive, DirectiveError
+from docutils.parsers.rst import Parser as RSTParser
+from docutils.parsers.rst.directives.misc import Include
+from docutils.parsers.rst.states import Body, Inliner, RSTStateMachine
+from docutils.statemachine import StringList
+from docutils.utils import unescape
+
+from .parsers.directives import parse_directive_text
+
+if TYPE_CHECKING:
+    from .mdit_to_docutils.base import DocutilsRenderer
+
+
+class MockingError(Exception):
+    """An exception to signal an error during mocking of docutils components."""
+
+
+class MockInliner:
+    """A mock version of `docutils.parsers.rst.states.Inliner`.
+
+    This is parsed to role functions.
+    """
+
+    def __init__(self, renderer: DocutilsRenderer):
+        """Initialize the mock inliner."""
+        self._renderer = renderer
+        # here we mock that the `parse` method has already been called
+        # which is where these attributes are set (via the RST state Memo)
+        self.document = renderer.document
+        self.reporter = renderer.document.reporter
+        self.language = renderer.language_module_rst
+        self.parent = renderer.current_node
+
+        if not hasattr(self.reporter, "get_source_and_line"):
+            # In docutils this is set by `RSTState.runtime_init`
+            self.reporter.get_source_and_line = lambda l: (self.document["source"], l)
+
+        self.rfc_url = "rfc%d.html"
+
+    def problematic(
+        self, text: str, rawsource: str, message: nodes.system_message
+    ) -> nodes.problematic:
+        """Record a system message from parsing."""
+        msgid = self.document.set_id(message, self.parent)
+        problematic = nodes.problematic(rawsource, text, refid=msgid)
+        prbid = self.document.set_id(problematic)
+        message.add_backref(prbid)
+        return problematic
+
+    def parse(
+        self, text: str, lineno: int, memo: Any, parent: nodes.Node
+    ) -> tuple[list[nodes.Node], list[nodes.system_message]]:
+        """Parse the text and return a list of nodes."""
+        # note the only place this is normally called,
+        # is by `RSTState.inline_text`, or in directives: `self.state.inline_text`,
+        # and there the state parses its own parent
+        # self.reporter = memo.reporter
+        # self.document = memo.document
+        # self.language = memo.language
+        with self._renderer.current_node_context(parent):
+            # the parent is never actually appended to though,
+            # so we make a temporary parent to parse into
+            container = nodes.Element()
+            with self._renderer.current_node_context(container):
+                self._renderer.nested_render_text(text, lineno, inline=True)
+
+        return container.children, []
+
+    def __getattr__(self, name: str):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        # TODO use document.reporter mechanism?
+        if hasattr(Inliner, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = f"{type(self).__name__} has no attribute {name}"
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockState:
+    """A mock version of `docutils.parsers.rst.states.RSTState`.
+
+    This is parsed to the `Directives.run()` method,
+    so that they may run nested parses on their content that will be parsed as markdown,
+    rather than RST.
+    """
+
+    def __init__(
+        self,
+        renderer: DocutilsRenderer,
+        state_machine: MockStateMachine,
+        lineno: int,
+    ):
+        self._renderer = renderer
+        self._lineno = lineno
+        self.document = renderer.document
+        self.reporter = renderer.document.reporter
+        self.state_machine = state_machine
+        self.inliner = MockInliner(renderer)
+
+        class Struct:
+            document = self.document
+            reporter = self.document.reporter
+            language = renderer.language_module_rst
+            title_styles: list[str] = []
+            section_level = max(renderer._level_to_elem)
+            section_bubble_up_kludge = False
+            inliner = self.inliner
+
+        self.memo = Struct
+
+    def parse_directive_block(
+        self,
+        content: StringList,
+        line_offset: int,
+        directive: type[Directive],
+        option_presets: dict,
+    ) -> tuple[list, dict, StringList, int]:
+        """Parse the full directive text
+
+        :returns: (arguments, options, content, content_offset)
+        """
+        if option_presets:
+            raise MockingError("parse_directive_block: option_presets not implemented")
+        # TODO should argument_str always be ""?
+        arguments, options, body_lines, content_offset = parse_directive_text(
+            directive, "", "\n".join(content)
+        )
+        return (
+            arguments,
+            options,
+            StringList(body_lines, source=content.source),
+            line_offset + content_offset,
+        )
+
+    def nested_parse(
+        self,
+        block: StringList,
+        input_offset: int,
+        node: nodes.Element,
+        match_titles: bool = False,
+        state_machine_class=None,
+        state_machine_kwargs=None,
+    ) -> None:
+        """Perform a nested parse of the input block, with ``node`` as the parent.
+
+        :param block: The block of lines to parse.
+        :param input_offset: The offset of the first line of block,
+            to the starting line of the state (i.e. directive).
+        :param node: The parent node to attach the parsed content to.
+        :param match_titles: Whether to to allow the parsing of headings
+            (normally this is false,
+            since nested heading would break the document structure)
+        """
+        sm_match_titles = self.state_machine.match_titles
+        with self._renderer.current_node_context(node):
+            self._renderer.nested_render_text(
+                "\n".join(block),
+                self._lineno + input_offset,
+                allow_headings=match_titles,
+            )
+        self.state_machine.match_titles = sm_match_titles
+
+    def parse_target(self, block, block_text, lineno: int):
+        """
+        Taken from https://github.com/docutils-mirror/docutils/blob/e88c5fb08d5cdfa8b4ac1020dd6f7177778d5990/docutils/parsers/rst/states.py#L1927  # noqa: E501
+        """
+        # Commenting out this code because it only applies to rST
+        # if block and block[-1].strip()[-1:] == "_":  # possible indirect target
+        #     reference = " ".join([line.strip() for line in block])
+        #     refname = self.is_reference(reference)
+        #     if refname:
+        #         return "refname", refname
+        reference = "".join(["".join(line.split()) for line in block])
+        return "refuri", unescape(reference)
+
+    def inline_text(
+        self, text: str, lineno: int
+    ) -> tuple[list[nodes.Element], list[nodes.Element]]:
+        """Parse text with only inline rules.
+
+        :returns: (list of nodes, list of messages)
+        """
+        return self.inliner.parse(text, lineno, self.memo, self._renderer.current_node)
+
+    # U+2014 is an em-dash:
+    attribution_pattern = re.compile("^((?:---?(?!-)|\u2014) *)(.+)")
+
+    def block_quote(self, lines: list[str], line_offset: int) -> list[nodes.Element]:
+        """Parse a block quote, which is a block of text,
+        followed by an (optional) attribution.
+
+        ::
+
+           No matter where you go, there you are.
+
+           -- Buckaroo Banzai
+        """
+        elements = []
+        # split attribution
+        last_line_blank = False
+        blockquote_lines = lines
+        attribution_lines = []
+        attribution_line_offset = None
+        # First line after a blank line must begin with a dash
+        for i, line in enumerate(lines):
+            if not line.strip():
+                last_line_blank = True
+                continue
+            if not last_line_blank:
+                last_line_blank = False
+                continue
+            last_line_blank = False
+            match = self.attribution_pattern.match(line)
+            if not match:
+                continue
+            attribution_line_offset = i
+            attribution_lines = [match.group(2)]
+            for at_line in lines[i + 1 :]:
+                indented_line = at_line[len(match.group(1)) :]
+                if len(indented_line) != len(at_line.lstrip()):
+                    break
+                attribution_lines.append(indented_line)
+            blockquote_lines = lines[:i]
+            break
+        # parse block
+        blockquote = nodes.block_quote()
+        self.nested_parse(blockquote_lines, line_offset, blockquote)
+        elements.append(blockquote)
+        # parse attribution
+        if attribution_lines:
+            attribution_text = "\n".join(attribution_lines)
+            lineno = self._lineno + line_offset + (attribution_line_offset or 0)
+            textnodes, messages = self.inline_text(attribution_text, lineno)
+            attribution = nodes.attribution(attribution_text, "", *textnodes)
+            (
+                attribution.source,
+                attribution.line,
+            ) = self.state_machine.get_source_and_line(lineno)
+            blockquote += attribution
+            elements += messages
+        return elements
+
+    def build_table(self, tabledata, tableline, stub_columns: int = 0, widths=None):
+        return Body.build_table(self, tabledata, tableline, stub_columns, widths)
+
+    def build_table_row(self, rowdata, tableline):
+        return Body.build_table_row(self, rowdata, tableline)
+
+    def __getattr__(self, name: str):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        cls = type(self).__name__
+        if hasattr(Body, name):
+            msg = (
+                f"{cls} has not yet implemented attribute '{name}'. "
+                "You can parse RST directly via the `{eval-rst}` directive: "
+                "https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#how-directives-parse-content"  # noqa: E501
+            )
+        else:
+            # The requested `name` is not a docutils Body element
+            # (such as "footnote", "block_quote", "paragraph", …)
+            msg = f"{cls} has no attribute '{name}'"
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockStateMachine:
+    """A mock version of `docutils.parsers.rst.states.RSTStateMachine`.
+
+    This is parsed to the `Directives.run()` method.
+    """
+
+    def __init__(self, renderer: DocutilsRenderer, lineno: int):
+        self._renderer = renderer
+        self._lineno = lineno
+        self.document = renderer.document
+        self.language = renderer.language_module_rst
+        self.reporter = self.document.reporter
+        self.node: nodes.Element = renderer.current_node
+        self.match_titles: bool = True
+
+    def get_source(self, lineno: int | None = None):
+        """Return document source path."""
+        return self.document["source"]
+
+    def get_source_and_line(self, lineno: int | None = None):
+        """Return (source path, line) tuple for current or given line number."""
+        return self.document["source"], lineno or self._lineno
+
+    def __getattr__(self, name: str):
+        """This method is only be called if the attribute requested has not
+        been defined. Defined attributes will not be overridden.
+        """
+        if hasattr(RSTStateMachine, name):
+            msg = "{cls} has not yet implemented attribute '{name}'".format(
+                cls=type(self).__name__, name=name
+            )
+            raise MockingError(msg).with_traceback(sys.exc_info()[2])
+        msg = f"{type(self).__name__} has no attribute {name}"
+        raise MockingError(msg).with_traceback(sys.exc_info()[2])
+
+
+class MockIncludeDirective:
+    """This directive uses a lot of statemachine logic that is not yet mocked.
+    Therefore, we treat it as a special case (at least for now).
+
+    See:
+    https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment
+    """
+
+    def __init__(
+        self,
+        renderer: DocutilsRenderer,
+        name: str,
+        klass: Include,
+        arguments: list,
+        options: dict,
+        body: list[str],
+        lineno: int,
+    ):
+        self.renderer = renderer
+        self.document = renderer.document
+        self.name = name
+        self.klass = klass
+        self.arguments = arguments
+        self.options = options
+        self.body = body
+        self.lineno = lineno
+
+    def run(self) -> list[nodes.Element]:
+
+        from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
+
+        if not self.document.settings.file_insertion_enabled:
+            raise DirectiveError(2, f'Directive "{self.name}" disabled.')
+
+        source_dir = Path(self.document["source"]).absolute().parent
+        include_arg = "".join([s.strip() for s in self.arguments[0].splitlines()])
+
+        if include_arg.startswith("<") and include_arg.endswith(">"):
+            # # docutils "standard" includes
+            path = Path(self.klass.standard_include_path).joinpath(include_arg[1:-1])
+        else:
+            # if using sphinx interpret absolute paths "correctly",
+            # i.e. relative to source directory
+            try:
+                sphinx_env = self.document.settings.env
+            except AttributeError:
+                pass
+            else:
+                _, include_arg = sphinx_env.relfn2path(self.arguments[0])
+                sphinx_env.note_included(include_arg)
+            path = Path(include_arg)
+        path = source_dir.joinpath(path)
+        # this ensures that the parent file is rebuilt if the included file changes
+        self.document.settings.record_dependencies.add(str(path))
+
+        # read file
+        encoding = self.options.get("encoding", self.document.settings.input_encoding)
+        error_handler = self.document.settings.input_encoding_error_handler
+        # tab_width = self.options.get("tab-width", self.document.settings.tab_width)
+        try:
+            file_content = path.read_text(encoding=encoding, errors=error_handler)
+        except Exception as error:
+            raise DirectiveError(
+                4,
+                'Directive "{}": error reading file: {}\n{}.'.format(
+                    self.name, path, error
+                ),
+            )
+
+        # get required section of text
+        startline = self.options.get("start-line", None)
+        endline = self.options.get("end-line", None)
+        file_content = "\n".join(file_content.splitlines()[startline:endline])
+        startline = startline or 0
+        for split_on_type in ["start-after", "end-before"]:
+            split_on = self.options.get(split_on_type, None)
+            if not split_on:
+                continue
+            split_index = file_content.find(split_on)
+            if split_index < 0:
+                raise DirectiveError(
+                    4,
+                    'Directive "{}"; option "{}": text not found "{}".'.format(
+                        self.name, split_on_type, split_on
+                    ),
+                )
+            if split_on_type == "start-after":
+                startline += split_index + len(split_on)
+                file_content = file_content[split_index + len(split_on) :]
+            else:
+                file_content = file_content[:split_index]
+
+        if "literal" in self.options:
+            literal_block = nodes.literal_block(
+                file_content, source=str(path), classes=self.options.get("class", [])
+            )
+            literal_block.line = 1  # TODO don;t think this should be 1?
+            self.add_name(literal_block)
+            if "number-lines" in self.options:
+                try:
+                    startline = int(self.options["number-lines"] or 1)
+                except ValueError:
+                    raise DirectiveError(
+                        3, ":number-lines: with non-integer " "start value"
+                    )
+                endline = startline + len(file_content.splitlines())
+                if file_content.endswith("\n"):
+                    file_content = file_content[:-1]
+                tokens = NumberLines([([], file_content)], startline, endline)
+                for classes, value in tokens:
+                    if classes:
+                        literal_block += nodes.inline(value, value, classes=classes)
+                    else:
+                        literal_block += nodes.Text(value)
+            else:
+                literal_block += nodes.Text(file_content)
+            return [literal_block]
+        if "code" in self.options:
+            self.options["source"] = str(path)
+            state_machine = MockStateMachine(self.renderer, self.lineno)
+            state = MockState(self.renderer, state_machine, self.lineno)
+            codeblock = CodeBlock(
+                name=self.name,
+                arguments=[self.options.pop("code")],
+                options=self.options,
+                content=file_content.splitlines(),
+                lineno=self.lineno,
+                content_offset=0,
+                block_text=file_content,
+                state=state,
+                state_machine=state_machine,
+            )
+            return codeblock.run()
+
+        # Here we perform a nested render, but temporarily setup the document/reporter
+        # with the correct document path and lineno for the included file.
+        source = self.renderer.document["source"]
+        rsource = self.renderer.reporter.source
+        line_func = getattr(self.renderer.reporter, "get_source_and_line", None)
+        try:
+            self.renderer.document["source"] = str(path)
+            self.renderer.reporter.source = str(path)
+            self.renderer.reporter.get_source_and_line = lambda l: (str(path), l)
+            if "relative-images" in self.options:
+                self.renderer.md_env["relative-images"] = os.path.relpath(
+                    path.parent, source_dir
+                )
+            if "relative-docs" in self.options:
+                self.renderer.md_env["relative-docs"] = (
+                    self.options["relative-docs"],
+                    source_dir,
+                    path.parent,
+                )
+            self.renderer.nested_render_text(
+                file_content, startline + 1, allow_headings=True
+            )
+        finally:
+            self.renderer.document["source"] = source
+            self.renderer.reporter.source = rsource
+            self.renderer.md_env.pop("relative-images", None)
+            self.renderer.md_env.pop("relative-docs", None)
+            if line_func is not None:
+                self.renderer.reporter.get_source_and_line = line_func
+            else:
+                del self.renderer.reporter.get_source_and_line
+        return []
+
+    def add_name(self, node: nodes.Element):
+        """Append self.options['name'] to node['names'] if it exists.
+
+        Also normalize the name string and register it as explicit target.
+        """
+        if "name" in self.options:
+            name = nodes.fully_normalize_name(self.options.pop("name"))
+            if "name" in node:
+                del node["name"]
+            node["names"].append(name)
+            self.renderer.document.note_explicit_target(node, node)
+
+
+class MockRSTParser(RSTParser):
+    """RSTParser which avoids a negative side effect."""
+
+    def parse(self, inputstring: str, document: nodes.document):
+        """Parse the input to populate the document AST."""
+        from docutils.parsers.rst import roles
+
+        should_restore = False
+        if "" in roles._roles:
+            should_restore = True
+            blankrole = roles._roles[""]
+
+        super().parse(inputstring, document)
+
+        if should_restore:
+            roles._roles[""] = blankrole
diff --git a/myst_parser/parsers/__init__.py b/myst_parser/parsers/__init__.py
new file mode 100644
index 0000000..26fbfca
--- /dev/null
+++ b/myst_parser/parsers/__init__.py
@@ -0,0 +1 @@
+"""Parsers of MyST Markdown source text to docutils AST."""
diff --git a/myst_parser/parsers/directives.py b/myst_parser/parsers/directives.py
new file mode 100644
index 0000000..5637254
--- /dev/null
+++ b/myst_parser/parsers/directives.py
@@ -0,0 +1,190 @@
+"""Fenced code blocks are parsed as directives,
+if the block starts with ``{directive_name}``,
+followed by arguments on the same line.
+
+Directive options are read from a YAML block,
+if the first content line starts with ``---``, e.g.
+
+::
+
+    ```{directive_name} arguments
+    ---
+    option1: name
+    option2: |
+        Longer text block
+    ---
+    content...
+    ```
+
+Or the option block will be parsed if the first content line starts with ``:``,
+as a YAML block consisting of every line that starts with a ``:``, e.g.
+
+::
+
+    ```{directive_name} arguments
+    :option1: name
+    :option2: other
+
+    content...
+    ```
+
+If the first line of a directive's content is blank, this will be stripped
+from the content.
+This is to allow for separation between the option block and content.
+
+"""
+from __future__ import annotations
+
+import datetime
+import re
+from textwrap import dedent
+from typing import Any, Callable
+
+import yaml
+from docutils.parsers.rst import Directive
+from docutils.parsers.rst.directives.misc import TestDirective
+
+
+class DirectiveParsingError(Exception):
+    """Raise on parsing/validation error."""
+
+    pass
+
+
+def parse_directive_text(
+    directive_class: type[Directive],
+    first_line: str,
+    content: str,
+    validate_options: bool = True,
+) -> tuple[list[str], dict, list[str], int]:
+    """Parse (and validate) the full directive text.
+
+    :param first_line: The text on the same line as the directive name.
+        May be an argument or body text, dependent on the directive
+    :param content: All text after the first line. Can include options.
+    :param validate_options: Whether to validate the values of options
+
+    :returns: (arguments, options, body_lines, content_offset)
+    """
+    if directive_class.option_spec:
+        body, options = parse_directive_options(
+            content, directive_class, validate=validate_options
+        )
+        body_lines = body.splitlines()
+        content_offset = len(content.splitlines()) - len(body_lines)
+    else:
+        # If there are no possible options, we do not look for a YAML block
+        options = {}
+        body_lines = content.splitlines()
+        content_offset = 0
+
+    if not (directive_class.required_arguments or directive_class.optional_arguments):
+        # If there are no possible arguments, then the body starts on the argument line
+        if first_line:
+            body_lines.insert(0, first_line)
+        arguments = []
+    else:
+        arguments = parse_directive_arguments(directive_class, first_line)
+
+    # remove first line of body if blank
+    # this is to allow space between the options and the content
+    if body_lines and not body_lines[0].strip():
+        body_lines = body_lines[1:]
+        content_offset += 1
+
+    # check for body content
+    if body_lines and not directive_class.has_content:
+        raise DirectiveParsingError("No content permitted")
+
+    return arguments, options, body_lines, content_offset
+
+
+def parse_directive_options(
+    content: str, directive_class: type[Directive], validate: bool = True
+):
+    """Parse (and validate) the directive option section."""
+    options: dict[str, Any] = {}
+    if content.startswith("---"):
+        content = "\n".join(content.splitlines()[1:])
+        match = re.search(r"^-{3,}", content, re.MULTILINE)
+        if match:
+            yaml_block = content[: match.start()]
+            content = content[match.end() + 1 :]  # TODO advance line number
+        else:
+            yaml_block = content
+            content = ""
+        yaml_block = dedent(yaml_block)
+        try:
+            options = yaml.safe_load(yaml_block) or {}
+        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error:
+            raise DirectiveParsingError("Invalid options YAML: " + str(error))
+    elif content.lstrip().startswith(":"):
+        content_lines = content.splitlines()  # type: list
+        yaml_lines = []
+        while content_lines:
+            if not content_lines[0].lstrip().startswith(":"):
+                break
+            yaml_lines.append(content_lines.pop(0).lstrip()[1:])
+        yaml_block = "\n".join(yaml_lines)
+        content = "\n".join(content_lines)
+        try:
+            options = yaml.safe_load(yaml_block) or {}
+        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error:
+            raise DirectiveParsingError("Invalid options YAML: " + str(error))
+        if not isinstance(options, dict):
+            raise DirectiveParsingError(f"Invalid options (not dict): {options}")
+
+    if (not validate) or issubclass(directive_class, TestDirective):
+        # technically this directive spec only accepts one option ('option')
+        # but since its for testing only we accept all options
+        return content, options
+
+    # check options against spec
+    options_spec: dict[str, Callable] = directive_class.option_spec
+    for name, value in list(options.items()):
+        try:
+            convertor = options_spec[name]
+        except KeyError:
+            raise DirectiveParsingError(f"Unknown option: {name}")
+        if not isinstance(value, str):
+            if value is True or value is None:
+                value = None  # flag converter requires no argument
+            elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
+                # convertor always requires string input
+                value = str(value)
+            else:
+                raise DirectiveParsingError(
+                    f'option "{name}" value not string (enclose with ""): {value}'
+                )
+        try:
+            converted_value = convertor(value)
+        except (ValueError, TypeError) as error:
+            raise DirectiveParsingError(
+                "Invalid option value: (option: '{}'; value: {})\n{}".format(
+                    name, value, error
+                )
+            )
+        options[name] = converted_value
+
+    return content, options
+
+
+def parse_directive_arguments(directive, arg_text):
+    """Parse (and validate) the directive argument section."""
+    required = directive.required_arguments
+    optional = directive.optional_arguments
+    arguments = arg_text.split()
+    if len(arguments) < required:
+        raise DirectiveParsingError(
+            f"{required} argument(s) required, {len(arguments)} supplied"
+        )
+    elif len(arguments) > required + optional:
+        if directive.final_argument_whitespace:
+            arguments = arg_text.split(None, required + optional - 1)
+        else:
+            raise DirectiveParsingError(
+                "maximum {} argument(s) allowed, {} supplied".format(
+                    required + optional, len(arguments)
+                )
+            )
+    return arguments
diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py
new file mode 100644
index 0000000..aaef5e2
--- /dev/null
+++ b/myst_parser/parsers/docutils_.py
@@ -0,0 +1,275 @@
+"""MyST Markdown parser for docutils."""
+from dataclasses import Field
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+
+from docutils import frontend, nodes
+from docutils.core import default_description, publish_cmdline
+from docutils.parsers.rst import Parser as RstParser
+from typing_extensions import Literal, get_args, get_origin
+
+from myst_parser.config.main import (
+    MdParserConfig,
+    TopmatterReadError,
+    merge_file_level,
+    read_topmatter,
+)
+from myst_parser.mdit_to_docutils.base import DocutilsRenderer, create_warning
+from myst_parser.parsers.mdit import create_md_parser
+
+
+def _validate_int(
+    setting, value, option_parser, config_parser=None, config_section=None
+) -> int:
+    """Validate an integer setting."""
+    return int(value)
+
+
+def _create_validate_tuple(length: int) -> Callable[..., Tuple[str, ...]]:
+    """Create a validator for a tuple of length `length`."""
+
+    def _validate(
+        setting, value, option_parser, config_parser=None, config_section=None
+    ):
+        string_list = frontend.validate_comma_separated_list(
+            setting, value, option_parser, config_parser, config_section
+        )
+        if len(string_list) != length:
+            raise ValueError(
+                f"Expecting {length} items in {setting}, got {len(string_list)}."
+            )
+        return tuple(string_list)
+
+    return _validate
+
+
+class Unset:
+    """A sentinel class for unset settings."""
+
+    def __repr__(self):
+        return "UNSET"
+
+
+DOCUTILS_UNSET = Unset()
+"""Sentinel for arguments not set through docutils.conf."""
+
+
+DOCUTILS_EXCLUDED_ARGS = (
+    # docutils.conf can't represent callables
+    "heading_slug_func",
+    # docutils.conf can't represent dicts
+    "html_meta",
+    "substitutions",
+    # we can't add substitutions so not needed
+    "sub_delimiters",
+    # sphinx only options
+    "heading_anchors",
+    "ref_domains",
+    "update_mathjax",
+    "mathjax_classes",
+)
+"""Names of settings that cannot be set in docutils.conf."""
+
+
+def _attr_to_optparse_option(at: Field, default: Any) -> Tuple[dict, str]:
+    """Convert a field into a Docutils optparse options dict."""
+    if at.type is int:
+        return {"metavar": "<int>", "validator": _validate_int}, f"(default: {default})"
+    if at.type is bool:
+        return {
+            "metavar": "<boolean>",
+            "validator": frontend.validate_boolean,
+        }, f"(default: {default})"
+    if at.type is str:
+        return {
+            "metavar": "<str>",
+        }, f"(default: '{default}')"
+    if get_origin(at.type) is Literal and all(
+        isinstance(a, str) for a in get_args(at.type)
+    ):
+        args = get_args(at.type)
+        return {
+            "metavar": f"<{'|'.join(repr(a) for a in args)}>",
+            "type": "choice",
+            "choices": args,
+        }, f"(default: {default!r})"
+    if at.type in (Iterable[str], Sequence[str]):
+        return {
+            "metavar": "<comma-delimited>",
+            "validator": frontend.validate_comma_separated_list,
+        }, f"(default: '{','.join(default)}')"
+    if at.type == Tuple[str, str]:
+        return {
+            "metavar": "<str,str>",
+            "validator": _create_validate_tuple(2),
+        }, f"(default: '{','.join(default)}')"
+    if at.type == Union[int, type(None)]:
+        return {
+            "metavar": "<null|int>",
+            "validator": _validate_int,
+        }, f"(default: {default})"
+    if at.type == Union[Iterable[str], type(None)]:
+        default_str = ",".join(default) if default else ""
+        return {
+            "metavar": "<null|comma-delimited>",
+            "validator": frontend.validate_comma_separated_list,
+        }, f"(default: {default_str!r})"
+    raise AssertionError(
+        f"Configuration option {at.name} not set up for use in docutils.conf."
+    )
+
+
+def attr_to_optparse_option(
+    attribute: Field, default: Any, prefix: str = "myst_"
+) -> Tuple[str, List[str], Dict[str, Any]]:
+    """Convert an ``MdParserConfig`` attribute into a Docutils setting tuple.
+
+    :returns: A tuple of ``(help string, option flags, optparse kwargs)``.
+    """
+    name = f"{prefix}{attribute.name}"
+    flag = "--" + name.replace("_", "-")
+    options = {"dest": name, "default": DOCUTILS_UNSET}
+    at_options, type_str = _attr_to_optparse_option(attribute, default)
+    options.update(at_options)
+    help_str = attribute.metadata.get("help", "") if attribute.metadata else ""
+    return (f"{help_str} {type_str}", [flag], options)
+
+
+def create_myst_settings_spec(
+    excluded: Sequence[str], config_cls=MdParserConfig, prefix: str = "myst_"
+):
+    """Return a list of Docutils setting for the docutils MyST section."""
+    defaults = config_cls()
+    return tuple(
+        attr_to_optparse_option(at, getattr(defaults, at.name), prefix)
+        for at in config_cls.get_fields()
+        if at.name not in excluded
+    )
+
+
+def create_myst_config(
+    settings: frontend.Values,
+    excluded: Sequence[str],
+    config_cls=MdParserConfig,
+    prefix: str = "myst_",
+):
+    """Create a configuration instance from the given settings."""
+    values = {}
+    for attribute in config_cls.get_fields():
+        if attribute.name in excluded:
+            continue
+        setting = f"{prefix}{attribute.name}"
+        val = getattr(settings, setting, DOCUTILS_UNSET)
+        if val is not DOCUTILS_UNSET:
+            values[attribute.name] = val
+    return config_cls(**values)
+
+
+class Parser(RstParser):
+    """Docutils parser for Markedly Structured Text (MyST)."""
+
+    supported: Tuple[str, ...] = ("md", "markdown", "myst")
+    """Aliases this parser supports."""
+
+    settings_spec = (
+        "MyST options",
+        None,
+        create_myst_settings_spec(DOCUTILS_EXCLUDED_ARGS),
+        *RstParser.settings_spec,
+    )
+    """Runtime settings specification."""
+
+    config_section = "myst parser"
+    config_section_dependencies = ("parsers",)
+    translate_section_name = None
+
+    def parse(self, inputstring: str, document: nodes.document) -> None:
+        """Parse source text.
+
+        :param inputstring: The source string to parse
+        :param document: The root docutils node to add AST elements to
+        """
+
+        self.setup_parse(inputstring, document)
+
+        # check for exorbitantly long lines
+        if hasattr(document.settings, "line_length_limit"):
+            for i, line in enumerate(inputstring.split("\n")):
+                if len(line) > document.settings.line_length_limit:
+                    error = document.reporter.error(
+                        f"Line {i+1} exceeds the line-length-limit:"
+                        f" {document.settings.line_length_limit}."
+                    )
+                    document.append(error)
+                    return
+
+        # create parsing configuration from the global config
+        try:
+            config = create_myst_config(document.settings, DOCUTILS_EXCLUDED_ARGS)
+        except Exception as exc:
+            error = document.reporter.error(f"Global myst configuration invalid: {exc}")
+            document.append(error)
+            config = MdParserConfig()
+
+        # update the global config with the file-level config
+        try:
+            topmatter = read_topmatter(inputstring)
+        except TopmatterReadError:
+            pass  # this will be reported during the render
+        else:
+            if topmatter:
+                warning = lambda wtype, msg: create_warning(  # noqa: E731
+                    document, msg, line=1, append_to=document, subtype=wtype
+                )
+                config = merge_file_level(config, topmatter, warning)
+
+        # parse content
+        parser = create_md_parser(config, DocutilsRenderer)
+        parser.options["document"] = document
+        parser.render(inputstring)
+
+        # post-processing
+
+        # replace raw nodes if raw is not allowed
+        if not getattr(document.settings, "raw_enabled", True):
+            for node in document.traverse(nodes.raw):
+                warning = document.reporter.warning("Raw content disabled.")
+                node.parent.replace(node, warning)
+
+        self.finish_parse()
+
+
+def _run_cli(writer_name: str, writer_description: str, argv: Optional[List[str]]):
+    """Run the command line interface for a particular writer."""
+    publish_cmdline(
+        parser=Parser(),
+        writer_name=writer_name,
+        description=(
+            f"Generates {writer_description} from standalone MyST sources.\n{default_description}"
+        ),
+        argv=argv,
+    )
+
+
+def cli_html(argv: Optional[List[str]] = None) -> None:
+    """Cmdline entrypoint for converting MyST to HTML."""
+    _run_cli("html", "(X)HTML documents", argv)
+
+
+def cli_html5(argv: Optional[List[str]] = None):
+    """Cmdline entrypoint for converting MyST to HTML5."""
+    _run_cli("html5", "HTML5 documents", argv)
+
+
+def cli_latex(argv: Optional[List[str]] = None):
+    """Cmdline entrypoint for converting MyST to LaTeX."""
+    _run_cli("latex", "LaTeX documents", argv)
+
+
+def cli_xml(argv: Optional[List[str]] = None):
+    """Cmdline entrypoint for converting MyST to XML."""
+    _run_cli("xml", "Docutils-native XML", argv)
+
+
+def cli_pseudoxml(argv: Optional[List[str]] = None):
+    """Cmdline entrypoint for converting MyST to pseudo-XML."""
+    _run_cli("pseudoxml", "pseudo-XML", argv)
diff --git a/myst_parser/parsers/mdit.py b/myst_parser/parsers/mdit.py
new file mode 100644
index 0000000..8476495
--- /dev/null
+++ b/myst_parser/parsers/mdit.py
@@ -0,0 +1,123 @@
+"""This module holds the ``create_md_parser`` function,
+which creates a parser from the config.
+"""
+from __future__ import annotations
+
+from typing import Callable
+
+from markdown_it import MarkdownIt
+from markdown_it.renderer import RendererProtocol
+from mdit_py_plugins.amsmath import amsmath_plugin
+from mdit_py_plugins.anchors import anchors_plugin
+from mdit_py_plugins.attrs import attrs_plugin
+from mdit_py_plugins.colon_fence import colon_fence_plugin
+from mdit_py_plugins.deflist import deflist_plugin
+from mdit_py_plugins.dollarmath import dollarmath_plugin
+from mdit_py_plugins.field_list import fieldlist_plugin
+from mdit_py_plugins.footnote import footnote_plugin
+from mdit_py_plugins.front_matter import front_matter_plugin
+from mdit_py_plugins.myst_blocks import myst_block_plugin
+from mdit_py_plugins.myst_role import myst_role_plugin
+from mdit_py_plugins.substitution import substitution_plugin
+from mdit_py_plugins.tasklists import tasklists_plugin
+from mdit_py_plugins.wordcount import wordcount_plugin
+
+from myst_parser.config.main import MdParserConfig
+
+
+def create_md_parser(
+    config: MdParserConfig, renderer: Callable[[MarkdownIt], RendererProtocol]
+) -> MarkdownIt:
+    """Return a Markdown parser with the required MyST configuration."""
+
+    # TODO warn if linkify required and linkify-it-py not installed
+    # (currently the parse will unceremoniously except)
+
+    if config.commonmark_only:
+        # see https://spec.commonmark.org/
+        md = MarkdownIt("commonmark", renderer_cls=renderer).use(
+            wordcount_plugin, per_minute=config.words_per_minute
+        )
+        md.options.update({"myst_config": config})
+        return md
+
+    if config.gfm_only:
+        # see https://github.github.com/gfm/
+        md = (
+            MarkdownIt("commonmark", renderer_cls=renderer)
+            # note, strikethrough currently only supported tentatively for HTML
+            .enable("strikethrough")
+            .enable("table")
+            .use(tasklists_plugin)
+            .enable("linkify")
+            .use(wordcount_plugin, per_minute=config.words_per_minute)
+        )
+        md.options.update({"linkify": True, "myst_config": config})
+        return md
+
+    md = (
+        MarkdownIt("commonmark", renderer_cls=renderer)
+        .enable("table")
+        .use(front_matter_plugin)
+        .use(myst_block_plugin)
+        .use(myst_role_plugin)
+        .use(footnote_plugin)
+        .use(wordcount_plugin, per_minute=config.words_per_minute)
+        .disable("footnote_inline")
+        # disable this for now, because it need a new implementation in the renderer
+        .disable("footnote_tail")
+    )
+
+    typographer = False
+    if "smartquotes" in config.enable_extensions:
+        md.enable("smartquotes")
+        typographer = True
+    if "replacements" in config.enable_extensions:
+        md.enable("replacements")
+        typographer = True
+    if "linkify" in config.enable_extensions:
+        md.enable("linkify")
+        if md.linkify is not None:
+            md.linkify.set({"fuzzy_link": config.linkify_fuzzy_links})
+    if "strikethrough" in config.enable_extensions:
+        md.enable("strikethrough")
+    if "dollarmath" in config.enable_extensions:
+        md.use(
+            dollarmath_plugin,
+            allow_labels=config.dmath_allow_labels,
+            allow_space=config.dmath_allow_space,
+            allow_digits=config.dmath_allow_digits,
+            double_inline=config.dmath_double_inline,
+        )
+    if "colon_fence" in config.enable_extensions:
+        md.use(colon_fence_plugin)
+    if "amsmath" in config.enable_extensions:
+        md.use(amsmath_plugin)
+    if "deflist" in config.enable_extensions:
+        md.use(deflist_plugin)
+    if "fieldlist" in config.enable_extensions:
+        md.use(fieldlist_plugin)
+    if "tasklist" in config.enable_extensions:
+        md.use(tasklists_plugin)
+    if "substitution" in config.enable_extensions:
+        md.use(substitution_plugin, *config.sub_delimiters)
+    if "attrs_image" in config.enable_extensions:
+        md.use(attrs_plugin, after=("image",))
+    if config.heading_anchors is not None:
+        md.use(
+            anchors_plugin,
+            max_level=config.heading_anchors,
+            slug_func=config.heading_slug_func,
+        )
+    for name in config.disable_syntax:
+        md.disable(name, True)
+
+    md.options.update(
+        {
+            "typographer": typographer,
+            "linkify": "linkify" in config.enable_extensions,
+            "myst_config": config,
+        }
+    )
+
+    return md
diff --git a/myst_parser/parsers/parse_html.py b/myst_parser/parsers/parse_html.py
new file mode 100644
index 0000000..7539e42
--- /dev/null
+++ b/myst_parser/parsers/parse_html.py
@@ -0,0 +1,440 @@
+"""A simple but complete HTML to Abstract Syntax Tree (AST) parser.
+
+The AST can also reproduce the HTML text.
+
+Example::
+
+    >> text = '<div class="note"><p>text</p></div>'
+    >> ast = tokenize_html(text)
+    >> list(ast.walk(include_self=True))
+    [Root(''), Tag('div', {'class': 'note'}), Tag('p'), Data('text')]
+    >> str(ast)
+    '<div class="note"><p>text</p></div>'
+    >> str(ast[0][0])
+    '<p>text</p>'
+
+Note: optional tags are not accounted for
+(see https://html.spec.whatwg.org/multipage/syntax.html#optional-tags)
+
+"""
+from __future__ import annotations
+
+import inspect
+import itertools
+from collections import abc, deque
+from html.parser import HTMLParser
+from typing import Any, Callable, Iterable, Iterator
+
+
+class Attribute(dict):
+    """This class holds the tags's attributes."""
+
+    def __getitem__(self, key: str) -> str:
+        """If self doesn't have the key it returns ''."""
+        return self.get(key, "")
+
+    @property
+    def classes(self) -> list[str]:
+        """Return 'class' attribute as list."""
+        return self["class"].split()
+
+    def __str__(self) -> str:
+        """Return a htmlized representation for attributes."""
+        return " ".join(f'{key}="{value}"' for key, value in self.items())
+
+
+class Element(abc.MutableSequence):
+    """An Element of the xml/html document.
+
+    All xml/html entities inherit from this class.
+    """
+
+    def __init__(self, name: str = "", attr: dict | None = None) -> None:
+        """Initialise the element."""
+        self.name = name
+        self.attrs: Attribute = Attribute(attr or {})
+        self._parent: Element | None = None
+        self._children: list[Element] = []
+
+    @property
+    def parent(self) -> Element | None:
+        """Return parent."""
+        return self._parent
+
+    @property
+    def children(self) -> list[Element]:
+        """Return copy of children."""
+        return self._children[:]
+
+    def reset_children(self, children: list[Element], deepcopy: bool = False):
+        new_children = []
+        for i, item in enumerate(children):
+            assert isinstance(item, Element)
+            if deepcopy:
+                item = item.deepcopy()
+            if item._parent is None:
+                item._parent = self
+            elif item._parent != self:
+                raise AssertionError(f"different parent already set for item {i}")
+            new_children.append(item)
+        self._children = new_children
+
+    def __getitem__(self, index: int) -> Element:  # type: ignore[override]
+        return self._children[index]
+
+    def __setitem__(self, index: int, item: Element):  # type: ignore[override]
+        assert isinstance(item, Element)
+        if item._parent is not None and item._parent != self:
+            raise AssertionError(f"different parent already set for: {item!r}")
+        item._parent = self
+        return self._children.__setitem__(index, item)
+
+    def __delitem__(self, index: int):  # type: ignore[override]
+        return self._children.__delitem__(index)
+
+    def __len__(self) -> int:
+        return self._children.__len__()
+
+    def __iter__(self) -> Iterator[Element]:
+        yield from self._children
+
+    def insert(self, index: int, item: Element):
+        assert isinstance(item, Element)
+        if item._parent is not None and item._parent != self:
+            raise AssertionError(f"different parent already set for: {item!r}")
+        item._parent = self
+        return self._children.insert(index, item)
+
+    def deepcopy(self) -> Element:
+        """Recursively copy and remove parent."""
+        _copy = self.__class__(self.name, self.attrs)
+        for child in self:
+            _copy_child = child.deepcopy()
+            _copy.append(_copy_child)
+        return _copy
+
+    def __repr__(self) -> str:
+        text = f"{self.__class__.__name__}({self.name!r}"
+        if self.attrs:
+            text += f", {self.attrs!r}"
+        text += ")"
+        return text
+
+    def render(
+        self,
+        tag_overrides: dict[str, Callable[[Element, dict], str]] | None = None,
+        **kwargs,
+    ) -> str:
+        """Returns a HTML string representation of the element.
+
+        :param tag_overrides: Provide a dictionary of render function
+            for specific tag names, to override the normal render format
+
+        """
+        raise NotImplementedError
+
+    def __str__(self) -> str:
+        return self.render()
+
+    def __eq__(self, item: Any) -> bool:
+        return item is self
+
+    def walk(self, include_self: bool = False) -> Iterator[Element]:
+        """Walk through the xml/html AST."""
+        if include_self:
+            yield self
+        for child in self:
+            yield child
+            yield from child.walk()
+
+    def strip(self, inplace: bool = False, recurse: bool = False) -> Element:
+        """Return copy with all `Data` tokens
+        that only contain whitespace / newlines removed.
+        """
+        element = self
+        if not inplace:
+            element = self.deepcopy()
+        element.reset_children(
+            [
+                e
+                for e in element.children
+                if not (isinstance(e, Data) and e.data.strip() == "")
+            ]
+        )
+        if recurse:
+            for child in element:
+                child.strip(inplace=True, recurse=True)
+        return element
+
+    def find(
+        self,
+        identifier: str | type[Element],
+        attrs: dict | None = None,
+        classes: Iterable[str] | None = None,
+        include_self: bool = False,
+        recurse: bool = True,
+    ) -> Iterator[Element]:
+        """Find all elements that match name and specific attributes."""
+        iterator = self.walk() if recurse else self
+        if include_self:
+            iterator = itertools.chain([self], iterator)
+        if inspect.isclass(identifier):
+            test_func = lambda c: isinstance(c, identifier)  # noqa: E731
+        else:
+            test_func = lambda c: c.name == identifier  # noqa: E731
+        classes = set(classes) if classes is not None else classes
+        for child in iterator:
+            if test_func(child):
+                if classes is not None and not classes.issubset(child.attrs.classes):
+                    continue
+                for key, value in (attrs or {}).items():
+                    if child.attrs[key] != value:
+                        break
+                else:
+                    yield child
+
+
+class Root(Element):
+    """The root of the AST tree."""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        """Returns a string HTML representation of the structure."""
+        return "".join(child.render(**kwargs) for child in self)
+
+
+class Tag(Element):
+    """Represent xml/html tags under the form: <name key="value" ...> ... </name>."""
+
+    def render(
+        self,
+        tag_overrides: dict[str, Callable[[Element, dict], str]] | None = None,
+        **kwargs,
+    ) -> str:
+        if tag_overrides and self.name in tag_overrides:
+            return tag_overrides[self.name](self, tag_overrides)
+        return (
+            f"<{self.name}{' ' if self.attrs else ''}{self.attrs}>"
+            + "".join(
+                child.render(tag_overrides=tag_overrides, **kwargs) for child in self
+            )
+            + f"</{self.name}>"
+        )
+
+
+class XTag(Element):
+    """Represent XHTML style tags with no children, like `<img src="t.gif" />`"""
+
+    def render(
+        self,
+        tag_overrides: dict[str, Callable[[Element, dict], str]] | None = None,
+        **kwargs,
+    ) -> str:
+        if tag_overrides is not None and self.name in tag_overrides:
+            return tag_overrides[self.name](self, tag_overrides)
+        return f"<{self.name}{' ' if self.attrs else ''}{self.attrs}/>"
+
+
+class VoidTag(Element):
+    """Represent tags with no children, only start tag, like `<img src="t.gif" >`"""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return f"<{self.name}{' ' if self.attrs else ''}{self.attrs}>"
+
+
+class TerminalElement(Element):
+    def __init__(self, data: str):
+        super().__init__("")
+        self.data: str = data
+
+    def __repr__(self) -> str:
+        text = self.data
+        if len(text) > 20:
+            text = text[:17] + "..."
+        return f"{self.__class__.__name__}({text!r})"
+
+    def deepcopy(self) -> TerminalElement:
+        """Copy and remove parent."""
+        _copy = self.__class__(self.data)
+        return _copy
+
+
+class Data(TerminalElement):
+    """Represent data inside xml/html documents, like raw text."""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return self.data
+
+
+class Declaration(TerminalElement):
+    """Represent declarations, like `<!DOCTYPE html>`"""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return f"<!{self.data}>"
+
+
+class Comment(TerminalElement):
+    """Represent HTML comments"""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return f"<!--{self.data}-->"
+
+
+class Pi(TerminalElement):
+    """Represent processing instructions like `<?xml-stylesheet ?>`"""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return f"<?{self.data}>"
+
+
+class Char(TerminalElement):
+    """Represent character codes like: `&#0`"""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return f"&#{self.data};"
+
+
+class Entity(TerminalElement):
+    """Represent entities like `&amp`"""
+
+    def render(self, **kwargs) -> str:  # type: ignore[override]
+        return f"&{self.data};"
+
+
+class Tree:
+    """The engine class to generate the AST tree."""
+
+    def __init__(self, name: str = ""):
+        """Initialise Tree"""
+        self.name = name
+        self.outmost = Root(name)
+        self.stack: deque = deque()
+        self.stack.append(self.outmost)
+
+    def clear(self):
+        """Clear the outmost and stack for a new parsing."""
+        self.outmost = Root(self.name)
+        self.stack.clear()
+        self.stack.append(self.outmost)
+
+    def last(self) -> Element:
+        """Return the last pointer which point to the actual tag scope."""
+        return self.stack[-1]
+
+    def nest_tag(self, name: str, attrs: dict):
+        """Nest a given tag at the bottom of the tree using
+        the last stack's pointer.
+        """
+        pointer = self.stack.pop()
+        item = Tag(name, attrs)
+        pointer.append(item)
+        self.stack.append(pointer)
+        self.stack.append(item)
+
+    def nest_xtag(self, name: str, attrs: dict):
+        """Nest an XTag onto the tree."""
+        top = self.last()
+        item = XTag(name, attrs)
+        top.append(item)
+
+    def nest_vtag(self, name: str, attrs: dict):
+        """Nest a VoidTag onto the tree."""
+        top = self.last()
+        item = VoidTag(name, attrs)
+        top.append(item)
+
+    def nest_terminal(self, klass: type[TerminalElement], data: str):
+        """Nest the data onto the tree."""
+        top = self.last()
+        item = klass(data)
+        top.append(item)
+
+    def enclose(self, name: str):
+        """When a closing tag is found, pop the pointer's scope from the stack,
+        to then point to the earlier scope's tag.
+        """
+        count = 0
+        for ind in reversed(self.stack):
+            count = count + 1
+            if ind.name == name:
+                break
+        else:
+            count = 0
+
+        # It pops all the items which do not match with the closing tag.
+        for _ in range(0, count):
+            self.stack.pop()
+
+
+class HtmlToAst(HTMLParser):
+    """The tokenizer class."""
+
+    # see https://html.spec.whatwg.org/multipage/syntax.html#void-elements
+    void_elements = {
+        "area",
+        "base",
+        "br",
+        "col",
+        "embed",
+        "hr",
+        "img",
+        "input",
+        "link",
+        "meta",
+        "param",
+        "source",
+        "track",
+        "wbr",
+    }
+
+    def __init__(self, name: str = "", convert_charrefs: bool = False):
+        super().__init__(convert_charrefs=convert_charrefs)
+        self.struct = Tree(name)
+
+    def feed(self, source: str) -> Root:  # type: ignore[override]
+        """Parse the source string."""
+        self.struct.clear()
+        super().feed(source)
+        return self.struct.outmost
+
+    def handle_starttag(self, name: str, attr):
+        """When found an opening tag then nest it onto the tree."""
+        if name in self.void_elements:
+            self.struct.nest_vtag(name, attr)
+        else:
+            self.struct.nest_tag(name, attr)
+
+    def handle_startendtag(self, name: str, attr):
+        """When found a XHTML tag style then nest it up to the tree."""
+        self.struct.nest_xtag(name, attr)
+
+    def handle_endtag(self, name: str):
+        """When found a closing tag then makes it point to the right scope."""
+        if name not in self.void_elements:
+            self.struct.enclose(name)
+
+    def handle_data(self, data: str):
+        """Nest data onto the tree."""
+        self.struct.nest_terminal(Data, data)
+
+    def handle_decl(self, decl: str):
+        self.struct.nest_terminal(Declaration, decl)
+
+    def unknown_decl(self, decl: str):
+        self.struct.nest_terminal(Declaration, decl)
+
+    def handle_charref(self, data: str):
+        self.struct.nest_terminal(Char, data)
+
+    def handle_entityref(self, data: str):
+        self.struct.nest_terminal(Entity, data)
+
+    def handle_pi(self, data: str):
+        self.struct.nest_terminal(Pi, data)
+
+    def handle_comment(self, data: str):
+        self.struct.nest_terminal(Comment, data)
+
+
+def tokenize_html(text: str, name: str = "", convert_charrefs: bool = False) -> Root:
+    parser = HtmlToAst(name, convert_charrefs=convert_charrefs)
+    return parser.feed(text)
diff --git a/myst_parser/parsers/sphinx_.py b/myst_parser/parsers/sphinx_.py
new file mode 100644
index 0000000..fff098f
--- /dev/null
+++ b/myst_parser/parsers/sphinx_.py
@@ -0,0 +1,69 @@
+"""MyST Markdown parser for sphinx."""
+from __future__ import annotations
+
+from docutils import nodes
+from docutils.parsers.rst import Parser as RstParser
+from sphinx.parsers import Parser as SphinxParser
+from sphinx.util import logging
+
+from myst_parser.config.main import (
+    MdParserConfig,
+    TopmatterReadError,
+    merge_file_level,
+    read_topmatter,
+)
+from myst_parser.mdit_to_docutils.sphinx_ import SphinxRenderer, create_warning
+from myst_parser.parsers.mdit import create_md_parser
+
+SPHINX_LOGGER = logging.getLogger(__name__)
+
+
+class MystParser(SphinxParser):
+    """Sphinx parser for Markedly Structured Text (MyST)."""
+
+    supported: tuple[str, ...] = ("md", "markdown", "myst")
+    """Aliases this parser supports."""
+
+    settings_spec = RstParser.settings_spec
+    """Runtime settings specification.
+
+    Defines runtime settings and associated command-line options, as used by
+    `docutils.frontend.OptionParser`.  This is a concatenation of tuples of:
+
+    - Option group title (string or `None` which implies no group, just a list
+      of single options).
+
+    - Description (string or `None`).
+
+    - A sequence of option tuples
+    """
+
+    config_section = "myst parser"
+    config_section_dependencies = ("parsers",)
+    translate_section_name = None
+
+    def parse(self, inputstring: str, document: nodes.document) -> None:
+        """Parse source text.
+
+        :param inputstring: The source string to parse
+        :param document: The root docutils node to add AST elements to
+
+        """
+        # get the global config
+        config: MdParserConfig = document.settings.env.myst_config
+
+        # update the global config with the file-level config
+        try:
+            topmatter = read_topmatter(inputstring)
+        except TopmatterReadError:
+            pass  # this will be reported during the render
+        else:
+            if topmatter:
+                warning = lambda wtype, msg: create_warning(  # noqa: E731
+                    document, msg, line=1, append_to=document, subtype=wtype
+                )
+                config = merge_file_level(config, topmatter, warning)
+
+        parser = create_md_parser(config, SphinxRenderer)
+        parser.options["document"] = document
+        parser.render(inputstring)
diff --git a/myst_parser/py.typed b/myst_parser/py.typed
new file mode 100644
index 0000000..7632ecf
--- /dev/null
+++ b/myst_parser/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561
diff --git a/myst_parser/sphinx_.py b/myst_parser/sphinx_.py
new file mode 100644
index 0000000..b085086
--- /dev/null
+++ b/myst_parser/sphinx_.py
@@ -0,0 +1,6 @@
+"""A module for compatibility with the docutils>=0.17 `include` directive, in RST documents::
+
+   .. include::  path/to/file.md
+      :parser: myst_parser.sphinx_
+"""
+from myst_parser.parsers.sphinx_ import MystParser as Parser  # noqa: F401
diff --git a/myst_parser/sphinx_ext/__init__.py b/myst_parser/sphinx_ext/__init__.py
new file mode 100644
index 0000000..1bfeb71
--- /dev/null
+++ b/myst_parser/sphinx_ext/__init__.py
@@ -0,0 +1 @@
+"""Sphinx extension for myst_parser."""
diff --git a/myst_parser/sphinx_ext/directives.py b/myst_parser/sphinx_ext/directives.py
new file mode 100644
index 0000000..39ca2c6
--- /dev/null
+++ b/myst_parser/sphinx_ext/directives.py
@@ -0,0 +1,136 @@
+"""MyST specific directives"""
+from copy import copy
+from typing import List, Tuple, cast
+
+from docutils import nodes
+from docutils.parsers.rst import directives
+from sphinx.directives import SphinxDirective
+from sphinx.util.docutils import SphinxRole
+
+from myst_parser.mocking import MockState
+
+
+def align(argument):
+    return directives.choice(argument, ("left", "center", "right"))
+
+
+def figwidth_value(argument):
+    if argument.lower() == "image":
+        return "image"
+    else:
+        return directives.length_or_percentage_or_unitless(argument, "px")
+
+
+class SubstitutionReferenceRole(SphinxRole):
+    """Implement substitution references as a role.
+
+    Note, in ``docutils/parsers/rst/roles.py`` this is left unimplemented.
+    """
+
+    def run(self) -> Tuple[List[nodes.Node], List[nodes.system_message]]:
+        subref_node = nodes.substitution_reference(self.rawtext, self.text)
+        self.set_source_info(subref_node, self.lineno)
+        subref_node["refname"] = nodes.fully_normalize_name(self.text)
+        return [subref_node], []
+
+
+class FigureMarkdown(SphinxDirective):
+    """Directive for creating a figure with Markdown compatible syntax.
+
+    Example::
+
+        :::{figure-md} target
+        <img src="img/fun-fish.png" alt="fishy" class="bg-primary mb-1" width="200px">
+
+        This is a caption in **Markdown**
+        :::
+
+    """
+
+    required_arguments = 0
+    optional_arguments = 1  # image target
+    final_argument_whitespace = True
+    has_content = True
+
+    option_spec = {
+        "width": figwidth_value,
+        "class": directives.class_option,
+        "align": align,
+        "name": directives.unchanged,
+    }
+
+    def run(self) -> List[nodes.Node]:
+        figwidth = self.options.pop("width", None)
+        figclasses = self.options.pop("class", None)
+        align = self.options.pop("align", None)
+
+        if not isinstance(self.state, MockState):
+            return [self.figure_error("Directive is only supported in myst parser")]
+        state = cast(MockState, self.state)
+
+        # ensure html image enabled
+        myst_extensions = copy(state._renderer.md_config.enable_extensions)
+        node = nodes.Element()
+        try:
+            state._renderer.md_config.enable_extensions = list(
+                state._renderer.md_config.enable_extensions
+            ) + ["html_image"]
+            state.nested_parse(self.content, self.content_offset, node)
+        finally:
+            state._renderer.md_config.enable_extensions = myst_extensions
+
+        if not len(node.children) == 2:
+            return [
+                self.figure_error(
+                    "content should be one image, "
+                    "followed by a single paragraph caption"
+                )
+            ]
+
+        image_node, caption_para = node.children
+        if isinstance(image_node, nodes.paragraph):
+            image_node = image_node[0]
+
+        if not isinstance(image_node, nodes.image):
+            return [
+                self.figure_error(
+                    "content should be one image (not found), "
+                    "followed by single paragraph caption"
+                )
+            ]
+
+        if not isinstance(caption_para, nodes.paragraph):
+            return [
+                self.figure_error(
+                    "content should be one image, "
+                    "followed by single paragraph caption (not found)"
+                )
+            ]
+
+        caption_node = nodes.caption(caption_para.rawsource, "", *caption_para.children)
+        caption_node.source = caption_para.source
+        caption_node.line = caption_para.line
+
+        figure_node = nodes.figure("", image_node, caption_node)
+        self.set_source_info(figure_node)
+
+        if figwidth is not None:
+            figure_node["width"] = figwidth
+        if figclasses:
+            figure_node["classes"] += figclasses
+        if align:
+            figure_node["align"] = align
+        if self.arguments:
+            self.options["name"] = self.arguments[0]
+            self.add_name(figure_node)
+
+        return [figure_node]
+
+    def figure_error(self, message):
+        """A warning for reporting an invalid figure."""
+        error = self.state_machine.reporter.error(
+            message,
+            nodes.literal_block(self.block_text, self.block_text),
+            line=self.lineno,
+        )
+        return error
diff --git a/myst_parser/sphinx_ext/main.py b/myst_parser/sphinx_ext/main.py
new file mode 100644
index 0000000..f5aeffc
--- /dev/null
+++ b/myst_parser/sphinx_ext/main.py
@@ -0,0 +1,60 @@
+"""The setup for the sphinx extension."""
+from typing import Any
+
+from sphinx.application import Sphinx
+
+
+def setup_sphinx(app: Sphinx, load_parser=False):
+    """Initialize all settings and transforms in Sphinx."""
+    # we do this separately to setup,
+    # so that it can be called by external packages like myst_nb
+    from myst_parser.config.main import MdParserConfig
+    from myst_parser.parsers.sphinx_ import MystParser
+    from myst_parser.sphinx_ext.directives import (
+        FigureMarkdown,
+        SubstitutionReferenceRole,
+    )
+    from myst_parser.sphinx_ext.mathjax import override_mathjax
+    from myst_parser.sphinx_ext.myst_refs import MystReferenceResolver
+
+    if load_parser:
+        app.add_source_suffix(".md", "markdown")
+        app.add_source_parser(MystParser)
+
+    app.add_role("sub-ref", SubstitutionReferenceRole())
+    app.add_directive("figure-md", FigureMarkdown)
+
+    app.add_post_transform(MystReferenceResolver)
+
+    for name, default, field in MdParserConfig().as_triple():
+        if not field.metadata.get("docutils_only", False):
+            # TODO add types?
+            app.add_config_value(f"myst_{name}", default, "env", types=Any)
+
+    app.connect("builder-inited", create_myst_config)
+    app.connect("builder-inited", override_mathjax)
+
+
+def create_myst_config(app):
+    from sphinx.util import logging
+
+    # Ignore type checkers because the attribute is dynamically assigned
+    from sphinx.util.console import bold  # type: ignore[attr-defined]
+
+    from myst_parser import __version__
+    from myst_parser.config.main import MdParserConfig
+
+    logger = logging.getLogger(__name__)
+
+    values = {
+        name: app.config[f"myst_{name}"]
+        for name, _, field in MdParserConfig().as_triple()
+        if not field.metadata.get("docutils_only", False)
+    }
+
+    try:
+        app.env.myst_config = MdParserConfig(**values)
+        logger.info(bold("myst v%s:") + " %s", __version__, app.env.myst_config)
+    except (TypeError, ValueError) as error:
+        logger.error("myst configuration invalid: %s", error.args[0])
+        app.env.myst_config = MdParserConfig()
diff --git a/myst_parser/sphinx_ext/mathjax.py b/myst_parser/sphinx_ext/mathjax.py
new file mode 100644
index 0000000..260f008
--- /dev/null
+++ b/myst_parser/sphinx_ext/mathjax.py
@@ -0,0 +1,118 @@
+"""Overrides to ``sphinx.ext.mathjax``
+
+This fixes two issues:
+
+1. Mathjax should not search for ``$`` delimiters, nor LaTeX amsmath environments,
+   since we already achieve this with the dollarmath and amsmath mrakdown-it-py plugins
+2. amsmath math blocks should be wrapped in mathjax delimiters (default ``\\[...\\]``),
+   and assigned an equation number
+
+"""
+from docutils import nodes
+from sphinx.application import Sphinx
+from sphinx.ext import mathjax
+from sphinx.locale import _
+from sphinx.util import logging
+from sphinx.util.math import get_node_equation_number
+from sphinx.writers.html import HTMLTranslator
+
+logger = logging.getLogger(__name__)
+
+
+def log_override_warning(app: Sphinx, version: int, current: str, new: str) -> None:
+    """Log a warning if MathJax configuration being overridden."""
+    if logging.is_suppressed_warning("myst", "mathjax", app.config.suppress_warnings):
+        return
+    config_name = (
+        "mathjax3_config['options']['processHtmlClass']"
+        if version == 3
+        else "mathjax_config['tex2jax']['processClass']"
+    )
+    logger.warning(
+        f"`{config_name}` is being overridden by myst-parser: '{current}' -> '{new}'. "
+        "Set `suppress_warnings=['myst.mathjax']` to ignore this warning, or "
+        "`myst_update_mathjax=False` if this is undesirable."
+    )
+
+
+def override_mathjax(app: Sphinx):
+    """Override aspects of the mathjax extension.
+
+    MyST-Parser parses dollar and latex math, via markdown-it plugins.
+    Therefore, we tell Mathjax to only render these HTML elements.
+    This is accompanied by setting the `ignoreClass` on the top-level section of each MyST document.
+    """
+    if (
+        "amsmath" in app.config["myst_enable_extensions"]
+        and "mathjax" in app.registry.html_block_math_renderers
+    ):
+        app.registry.html_block_math_renderers["mathjax"] = (
+            html_visit_displaymath,  # type: ignore[assignment]
+            None,
+        )
+
+    if "dollarmath" not in app.config["myst_enable_extensions"]:
+        return
+    if not app.env.myst_config.update_mathjax:  # type: ignore
+        return
+
+    mjax_classes = app.env.myst_config.mathjax_classes  # type: ignore
+
+    if "mathjax3_config" in app.config:
+        # sphinx 4 + mathjax 3
+        app.config.mathjax3_config = app.config.mathjax3_config or {}  # type: ignore
+        app.config.mathjax3_config.setdefault("options", {})
+        if (
+            "processHtmlClass" in app.config.mathjax3_config["options"]
+            and app.config.mathjax3_config["options"]["processHtmlClass"]
+            != mjax_classes
+        ):
+            log_override_warning(
+                app,
+                3,
+                app.config.mathjax3_config["options"]["processHtmlClass"],
+                mjax_classes,
+            )
+        app.config.mathjax3_config["options"]["processHtmlClass"] = mjax_classes
+    elif "mathjax_config" in app.config:
+        # sphinx 3 + mathjax 2
+        app.config.mathjax_config = app.config.mathjax_config or {}  # type: ignore[attr-defined]
+        app.config.mathjax_config.setdefault("tex2jax", {})
+        if (
+            "processClass" in app.config.mathjax_config["tex2jax"]
+            and app.config.mathjax_config["tex2jax"]["processClass"] != mjax_classes
+        ):
+            log_override_warning(
+                app,
+                2,
+                app.config.mathjax_config["tex2jax"]["processClass"],
+                mjax_classes,
+            )
+        app.config.mathjax_config["tex2jax"]["processClass"] = mjax_classes
+
+
+def html_visit_displaymath(self: HTMLTranslator, node: nodes.math_block) -> None:
+    """Override for sphinx.ext.mathjax.html_visit_displaymath to handle amsmath.
+
+    By default displaymath, are normally wrapped in a prefix/suffix,
+    defined by mathjax_display, and labelled nodes are numbered.
+    However, this is not the case if the math_block is set as 'nowrap', as for amsmath.
+    Therefore, we need to override this behaviour.
+    """
+    if "amsmath" in node.get("classes", []):
+        self.body.append(
+            self.starttag(node, "div", CLASS="math notranslate nohighlight amsmath")
+        )
+        if node["number"]:
+            number = get_node_equation_number(self, node)
+            self.body.append('<span class="eqno">(%s)' % number)
+            self.add_permalink_ref(node, _("Permalink to this equation"))
+            self.body.append("</span>")
+        prefix, suffix = self.builder.config.mathjax_display
+        self.body.append(prefix)
+        self.body.append(self.encode(node.astext()))
+        self.body.append(suffix)
+        self.body.append("</div>\n")
+        raise nodes.SkipNode
+
+    return mathjax.html_visit_displaymath(self, node)
diff --git a/myst_parser/sphinx_ext/myst_refs.py b/myst_parser/sphinx_ext/myst_refs.py
new file mode 100644
index 0000000..f364345
--- /dev/null
+++ b/myst_parser/sphinx_ext/myst_refs.py
@@ -0,0 +1,282 @@
+"""A post-transform for overriding the behaviour of sphinx reference resolution.
+
+This is applied to MyST type references only, such as ``[text](target)``,
+and allows for nested syntax
+"""
+import os
+from typing import Any, List, Optional, Tuple, cast
+
+from docutils import nodes
+from docutils.nodes import Element, document
+from sphinx import addnodes, version_info
+from sphinx.addnodes import pending_xref
+from sphinx.domains.std import StandardDomain
+from sphinx.locale import __
+from sphinx.transforms.post_transforms import ReferencesResolver
+from sphinx.util import docname_join, logging
+from sphinx.util.nodes import clean_astext, make_refnode
+
+from myst_parser._compat import findall
+
+try:
+    from sphinx.errors import NoUri
+except ImportError:
+    # sphinx < 2.1
+    from sphinx.environment import NoUri  # type: ignore
+
+logger = logging.getLogger(__name__)
+
+
+class MystReferenceResolver(ReferencesResolver):
+    """Resolves cross-references on doctrees.
+
+    Overrides default sphinx implementation, to allow for nested syntax
+    """
+
+    default_priority = 9  # higher priority than ReferencesResolver (10)
+
+    def run(self, **kwargs: Any) -> None:
+        self.document: document
+        for node in findall(self.document)(addnodes.pending_xref):
+            if node["reftype"] != "myst":
+                continue
+
+            contnode = cast(nodes.TextElement, node[0].deepcopy())
+            newnode = None
+
+            target = node["reftarget"]
+            refdoc = node.get("refdoc", self.env.docname)
+            domain = None
+
+            try:
+                newnode = self.resolve_myst_ref(refdoc, node, contnode)
+                if newnode is None:
+                    # no new node found? try the missing-reference event
+                    # but first we change the the reftype to 'any'
+                    # this means it is picked up by extensions like intersphinx
+                    node["reftype"] = "any"
+                    try:
+                        newnode = self.app.emit_firstresult(
+                            "missing-reference",
+                            self.env,
+                            node,
+                            contnode,
+                            **(
+                                {"allowed_exceptions": (NoUri,)}
+                                if version_info[0] > 2
+                                else {}
+                            ),
+                        )
+                    finally:
+                        node["reftype"] = "myst"
+                    # still not found? warn if node wishes to be warned about or
+                    # we are in nit-picky mode
+                    if newnode is None:
+                        node["refdomain"] = ""
+                        # TODO ideally we would override the warning message here,
+                        # to show the [ref.myst] for suppressing warning
+                        self.warn_missing_reference(
+                            refdoc, node["reftype"], target, node, domain
+                        )
+            except NoUri:
+                newnode = contnode
+
+            node.replace_self(newnode or contnode)
+
+    def resolve_myst_ref(
+        self, refdoc: str, node: pending_xref, contnode: Element
+    ) -> Element:
+        """Resolve reference generated by the "myst" role; ``[text](reference)``.
+
+        This builds on the sphinx ``any`` role to also resolve:
+
+        - Document references with extensions; ``[text](./doc.md)``
+        - Document references with anchors with anchors; ``[text](./doc.md#target)``
+        - Nested syntax for explicit text with std:doc and std:ref;
+          ``[**nested**](reference)``
+
+        """
+        target = node["reftarget"]  # type: str
+        results = []  # type: List[Tuple[str, Element]]
+
+        res_anchor = self._resolve_anchor(node, refdoc)
+        if res_anchor:
+            results.append(("std:doc", res_anchor))
+        else:
+            # if we've already found an anchored doc,
+            # don't search in the std:ref/std:doc (leads to duplication)
+
+            # resolve standard references
+            res = self._resolve_ref_nested(node, refdoc)
+            if res:
+                results.append(("std:ref", res))
+
+            # resolve doc names
+            res = self._resolve_doc_nested(node, refdoc)
+            if res:
+                results.append(("std:doc", res))
+
+        # get allowed domains for referencing
+        ref_domains = self.env.config.myst_ref_domains
+
+        assert self.app.builder
+
+        # next resolve for any other standard reference objects
+        if ref_domains is None or "std" in ref_domains:
+            stddomain = cast(StandardDomain, self.env.get_domain("std"))
+            for objtype in stddomain.object_types:
+                key = (objtype, target)
+                if objtype == "term":
+                    key = (objtype, target.lower())
+                if key in stddomain.objects:
+                    docname, labelid = stddomain.objects[key]
+                    domain_role = "std:" + stddomain.role_for_objtype(objtype)
+                    ref_node = make_refnode(
+                        self.app.builder, refdoc, docname, labelid, contnode
+                    )
+                    results.append((domain_role, ref_node))
+
+        # finally resolve for any other type of allowed reference domain
+        for domain in self.env.domains.values():
+            if domain.name == "std":
+                continue  # we did this one already
+            if ref_domains is not None and domain.name not in ref_domains:
+                continue
+            try:
+                results.extend(
+                    domain.resolve_any_xref(
+                        self.env, refdoc, self.app.builder, target, node, contnode
+                    )
+                )
+            except NotImplementedError:
+                # the domain doesn't yet support the new interface
+                # we have to manually collect possible references (SLOW)
+                if not (getattr(domain, "__module__", "").startswith("sphinx.")):
+                    logger.warning(
+                        f"Domain '{domain.__module__}::{domain.name}' has not "
+                        "implemented a `resolve_any_xref` method [myst.domains]",
+                        type="myst",
+                        subtype="domains",
+                        once=True,
+                    )
+                for role in domain.roles:
+                    res = domain.resolve_xref(
+                        self.env, refdoc, self.app.builder, role, target, node, contnode
+                    )
+                    if res and len(res) and isinstance(res[0], nodes.Element):
+                        results.append((f"{domain.name}:{role}", res))
+
+        # now, see how many matches we got...
+        if not results:
+            return None
+        if len(results) > 1:
+
+            def stringify(name, node):
+                reftitle = node.get("reftitle", node.astext())
+                return f":{name}:`{reftitle}`"
+
+            candidates = " or ".join(stringify(name, role) for name, role in results)
+            logger.warning(
+                __(
+                    f"more than one target found for 'myst' cross-reference {target}: "
+                    f"could be {candidates} [myst.ref]"
+                ),
+                location=node,
+                type="myst",
+                subtype="ref",
+            )
+
+        res_role, newnode = results[0]
+        # Override "myst" class with the actual role type to get the styling
+        # approximately correct.
+        res_domain = res_role.split(":")[0]
+        if len(newnode) > 0 and isinstance(newnode[0], nodes.Element):
+            newnode[0]["classes"] = newnode[0].get("classes", []) + [
+                res_domain,
+                res_role.replace(":", "-"),
+            ]
+
+        return newnode
+
+    def _resolve_anchor(
+        self, node: pending_xref, fromdocname: str
+    ) -> Optional[Element]:
+        """Resolve doc with anchor."""
+        if self.env.config.myst_heading_anchors is None:
+            # no target anchors will have been created, so we don't look for them
+            return None
+        target = node["reftarget"]  # type: str
+        if "#" not in target:
+            return None
+        # the link may be a heading anchor; we need to first get the relative path
+        rel_path, anchor = target.rsplit("#", 1)
+        rel_path = os.path.normpath(rel_path)
+        if rel_path == ".":
+            # anchor in the same doc as the node
+            doc_path = self.env.doc2path(node.get("refdoc", fromdocname), base=False)
+        else:
+            # anchor in a different doc from the node
+            doc_path = os.path.normpath(
+                os.path.join(node.get("refdoc", fromdocname), "..", rel_path)
+            )
+        return self._resolve_ref_nested(node, fromdocname, doc_path + "#" + anchor)
+
+    def _resolve_ref_nested(
+        self, node: pending_xref, fromdocname: str, target=None
+    ) -> Optional[Element]:
+        """This is the same as ``sphinx.domains.std._resolve_ref_xref``,
+        but allows for nested syntax, rather than converting the inner node to raw text.
+        """
+        stddomain = cast(StandardDomain, self.env.get_domain("std"))
+        target = target or node["reftarget"].lower()
+
+        if node["refexplicit"]:
+            # reference to anonymous label; the reference uses
+            # the supplied link caption
+            docname, labelid = stddomain.anonlabels.get(target, ("", ""))
+            sectname = node.astext()
+            innernode = nodes.inline(sectname, "")
+            innernode.extend(node[0].children)
+        else:
+            # reference to named label; the final node will
+            # contain the section name after the label
+            docname, labelid, sectname = stddomain.labels.get(target, ("", "", ""))
+            innernode = nodes.inline(sectname, sectname)
+
+        if not docname:
+            return None
+
+        assert self.app.builder
+        return make_refnode(self.app.builder, fromdocname, docname, labelid, innernode)
+
+    def _resolve_doc_nested(
+        self, node: pending_xref, fromdocname: str
+    ) -> Optional[Element]:
+        """This is the same as ``sphinx.domains.std._resolve_doc_xref``,
+        but allows for nested syntax, rather than converting the inner node to raw text.
+
+        It also allows for extensions on document names.
+        """
+        # directly reference to document by source name; can be absolute or relative
+        refdoc = node.get("refdoc", fromdocname)
+        docname = docname_join(refdoc, node["reftarget"])
+
+        if docname not in self.env.all_docs:
+            # try stripping known extensions from doc name
+            if os.path.splitext(docname)[1] in self.env.config.source_suffix:
+                docname = os.path.splitext(docname)[0]
+            if docname not in self.env.all_docs:
+                return None
+
+        if node["refexplicit"]:
+            # reference with explicit title
+            caption = node.astext()
+            innernode = nodes.inline(caption, "", classes=["doc"])
+            innernode.extend(node[0].children)
+        else:
+            # TODO do we want nested syntax for titles?
+            caption = clean_astext(self.env.titles[docname])
+            innernode = nodes.inline(caption, caption, classes=["doc"])
+
+        assert self.app.builder
+        return make_refnode(self.app.builder, fromdocname, docname, "", innernode)
-- 
cgit v1.2.3