summaryrefslogtreecommitdiffstats
path: root/myst_parser/parsers/docutils_.py
diff options
context:
space:
mode:
Diffstat (limited to 'myst_parser/parsers/docutils_.py')
-rw-r--r--myst_parser/parsers/docutils_.py275
1 files changed, 275 insertions, 0 deletions
diff --git a/myst_parser/parsers/docutils_.py b/myst_parser/parsers/docutils_.py
new file mode 100644
index 0000000..aaef5e2
--- /dev/null
+++ b/myst_parser/parsers/docutils_.py
@@ -0,0 +1,275 @@
+"""MyST Markdown parser for docutils."""
+from dataclasses import Field
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+
+from docutils import frontend, nodes
+from docutils.core import default_description, publish_cmdline
+from docutils.parsers.rst import Parser as RstParser
+from typing_extensions import Literal, get_args, get_origin
+
+from myst_parser.config.main import (
+ MdParserConfig,
+ TopmatterReadError,
+ merge_file_level,
+ read_topmatter,
+)
+from myst_parser.mdit_to_docutils.base import DocutilsRenderer, create_warning
+from myst_parser.parsers.mdit import create_md_parser
+
+
+def _validate_int(
+ setting, value, option_parser, config_parser=None, config_section=None
+) -> int:
+ """Validate an integer setting."""
+ return int(value)
+
+
+def _create_validate_tuple(length: int) -> Callable[..., Tuple[str, ...]]:
+ """Create a validator for a tuple of length `length`."""
+
+ def _validate(
+ setting, value, option_parser, config_parser=None, config_section=None
+ ):
+ string_list = frontend.validate_comma_separated_list(
+ setting, value, option_parser, config_parser, config_section
+ )
+ if len(string_list) != length:
+ raise ValueError(
+ f"Expecting {length} items in {setting}, got {len(string_list)}."
+ )
+ return tuple(string_list)
+
+ return _validate
+
+
+class Unset:
+ """A sentinel class for unset settings."""
+
+ def __repr__(self):
+ return "UNSET"
+
+
+DOCUTILS_UNSET = Unset()
+"""Sentinel for arguments not set through docutils.conf."""
+
+
+DOCUTILS_EXCLUDED_ARGS = (
+ # docutils.conf can't represent callables
+ "heading_slug_func",
+ # docutils.conf can't represent dicts
+ "html_meta",
+ "substitutions",
+ # we can't add substitutions so not needed
+ "sub_delimiters",
+ # sphinx only options
+ "heading_anchors",
+ "ref_domains",
+ "update_mathjax",
+ "mathjax_classes",
+)
+"""Names of settings that cannot be set in docutils.conf."""
+
+
+def _attr_to_optparse_option(at: Field, default: Any) -> Tuple[dict, str]:
+ """Convert a field into a Docutils optparse options dict."""
+ if at.type is int:
+ return {"metavar": "<int>", "validator": _validate_int}, f"(default: {default})"
+ if at.type is bool:
+ return {
+ "metavar": "<boolean>",
+ "validator": frontend.validate_boolean,
+ }, f"(default: {default})"
+ if at.type is str:
+ return {
+ "metavar": "<str>",
+ }, f"(default: '{default}')"
+ if get_origin(at.type) is Literal and all(
+ isinstance(a, str) for a in get_args(at.type)
+ ):
+ args = get_args(at.type)
+ return {
+ "metavar": f"<{'|'.join(repr(a) for a in args)}>",
+ "type": "choice",
+ "choices": args,
+ }, f"(default: {default!r})"
+ if at.type in (Iterable[str], Sequence[str]):
+ return {
+ "metavar": "<comma-delimited>",
+ "validator": frontend.validate_comma_separated_list,
+ }, f"(default: '{','.join(default)}')"
+ if at.type == Tuple[str, str]:
+ return {
+ "metavar": "<str,str>",
+ "validator": _create_validate_tuple(2),
+ }, f"(default: '{','.join(default)}')"
+ if at.type == Union[int, type(None)]:
+ return {
+ "metavar": "<null|int>",
+ "validator": _validate_int,
+ }, f"(default: {default})"
+ if at.type == Union[Iterable[str], type(None)]:
+ default_str = ",".join(default) if default else ""
+ return {
+ "metavar": "<null|comma-delimited>",
+ "validator": frontend.validate_comma_separated_list,
+ }, f"(default: {default_str!r})"
+ raise AssertionError(
+ f"Configuration option {at.name} not set up for use in docutils.conf."
+ )
+
+
+def attr_to_optparse_option(
+ attribute: Field, default: Any, prefix: str = "myst_"
+) -> Tuple[str, List[str], Dict[str, Any]]:
+ """Convert an ``MdParserConfig`` attribute into a Docutils setting tuple.
+
+ :returns: A tuple of ``(help string, option flags, optparse kwargs)``.
+ """
+ name = f"{prefix}{attribute.name}"
+ flag = "--" + name.replace("_", "-")
+ options = {"dest": name, "default": DOCUTILS_UNSET}
+ at_options, type_str = _attr_to_optparse_option(attribute, default)
+ options.update(at_options)
+ help_str = attribute.metadata.get("help", "") if attribute.metadata else ""
+ return (f"{help_str} {type_str}", [flag], options)
+
+
+def create_myst_settings_spec(
+ excluded: Sequence[str], config_cls=MdParserConfig, prefix: str = "myst_"
+):
+ """Return a list of Docutils setting for the docutils MyST section."""
+ defaults = config_cls()
+ return tuple(
+ attr_to_optparse_option(at, getattr(defaults, at.name), prefix)
+ for at in config_cls.get_fields()
+ if at.name not in excluded
+ )
+
+
+def create_myst_config(
+ settings: frontend.Values,
+ excluded: Sequence[str],
+ config_cls=MdParserConfig,
+ prefix: str = "myst_",
+):
+ """Create a configuration instance from the given settings."""
+ values = {}
+ for attribute in config_cls.get_fields():
+ if attribute.name in excluded:
+ continue
+ setting = f"{prefix}{attribute.name}"
+ val = getattr(settings, setting, DOCUTILS_UNSET)
+ if val is not DOCUTILS_UNSET:
+ values[attribute.name] = val
+ return config_cls(**values)
+
+
+class Parser(RstParser):
+ """Docutils parser for Markedly Structured Text (MyST)."""
+
+ supported: Tuple[str, ...] = ("md", "markdown", "myst")
+ """Aliases this parser supports."""
+
+ settings_spec = (
+ "MyST options",
+ None,
+ create_myst_settings_spec(DOCUTILS_EXCLUDED_ARGS),
+ *RstParser.settings_spec,
+ )
+ """Runtime settings specification."""
+
+ config_section = "myst parser"
+ config_section_dependencies = ("parsers",)
+ translate_section_name = None
+
+ def parse(self, inputstring: str, document: nodes.document) -> None:
+ """Parse source text.
+
+ :param inputstring: The source string to parse
+ :param document: The root docutils node to add AST elements to
+ """
+
+ self.setup_parse(inputstring, document)
+
+ # check for exorbitantly long lines
+ if hasattr(document.settings, "line_length_limit"):
+ for i, line in enumerate(inputstring.split("\n")):
+ if len(line) > document.settings.line_length_limit:
+ error = document.reporter.error(
+ f"Line {i+1} exceeds the line-length-limit:"
+ f" {document.settings.line_length_limit}."
+ )
+ document.append(error)
+ return
+
+ # create parsing configuration from the global config
+ try:
+ config = create_myst_config(document.settings, DOCUTILS_EXCLUDED_ARGS)
+ except Exception as exc:
+ error = document.reporter.error(f"Global myst configuration invalid: {exc}")
+ document.append(error)
+ config = MdParserConfig()
+
+ # update the global config with the file-level config
+ try:
+ topmatter = read_topmatter(inputstring)
+ except TopmatterReadError:
+ pass # this will be reported during the render
+ else:
+ if topmatter:
+ warning = lambda wtype, msg: create_warning( # noqa: E731
+ document, msg, line=1, append_to=document, subtype=wtype
+ )
+ config = merge_file_level(config, topmatter, warning)
+
+ # parse content
+ parser = create_md_parser(config, DocutilsRenderer)
+ parser.options["document"] = document
+ parser.render(inputstring)
+
+ # post-processing
+
+ # replace raw nodes if raw is not allowed
+ if not getattr(document.settings, "raw_enabled", True):
+ for node in document.traverse(nodes.raw):
+ warning = document.reporter.warning("Raw content disabled.")
+ node.parent.replace(node, warning)
+
+ self.finish_parse()
+
+
+def _run_cli(writer_name: str, writer_description: str, argv: Optional[List[str]]):
+ """Run the command line interface for a particular writer."""
+ publish_cmdline(
+ parser=Parser(),
+ writer_name=writer_name,
+ description=(
+ f"Generates {writer_description} from standalone MyST sources.\n{default_description}"
+ ),
+ argv=argv,
+ )
+
+
+def cli_html(argv: Optional[List[str]] = None) -> None:
+ """Cmdline entrypoint for converting MyST to HTML."""
+ _run_cli("html", "(X)HTML documents", argv)
+
+
+def cli_html5(argv: Optional[List[str]] = None):
+ """Cmdline entrypoint for converting MyST to HTML5."""
+ _run_cli("html5", "HTML5 documents", argv)
+
+
+def cli_latex(argv: Optional[List[str]] = None):
+ """Cmdline entrypoint for converting MyST to LaTeX."""
+ _run_cli("latex", "LaTeX documents", argv)
+
+
+def cli_xml(argv: Optional[List[str]] = None):
+ """Cmdline entrypoint for converting MyST to XML."""
+ _run_cli("xml", "Docutils-native XML", argv)
+
+
+def cli_pseudoxml(argv: Optional[List[str]] = None):
+ """Cmdline entrypoint for converting MyST to pseudo-XML."""
+ _run_cli("pseudoxml", "pseudo-XML", argv)