summaryrefslogtreecommitdiffstats
path: root/myst_parser/config
diff options
context:
space:
mode:
Diffstat (limited to 'myst_parser/config')
-rw-r--r--myst_parser/config/__init__.py1
-rw-r--r--myst_parser/config/dc_validators.py161
-rw-r--r--myst_parser/config/main.py409
3 files changed, 571 insertions, 0 deletions
diff --git a/myst_parser/config/__init__.py b/myst_parser/config/__init__.py
new file mode 100644
index 0000000..898f9ce
--- /dev/null
+++ b/myst_parser/config/__init__.py
@@ -0,0 +1 @@
+"""This module holds the global configuration for the parser ``MdParserConfig``."""
diff --git a/myst_parser/config/dc_validators.py b/myst_parser/config/dc_validators.py
new file mode 100644
index 0000000..765cfb9
--- /dev/null
+++ b/myst_parser/config/dc_validators.py
@@ -0,0 +1,161 @@
+"""Validators for dataclasses, mirroring those of https://github.com/python-attrs/attrs."""
+from __future__ import annotations
+
+import dataclasses as dc
+from typing import Any, Sequence
+
+from typing_extensions import Protocol
+
+
+def validate_field(inst: Any, field: dc.Field, value: Any) -> None:
+ """Validate the field of a dataclass,
+ according to a `validator` function set in the field.metadata.
+
+ The validator function should take as input (inst, field, value) and
+ raise an exception if the value is invalid.
+ """
+ if "validator" not in field.metadata:
+ return
+ if isinstance(field.metadata["validator"], list):
+ for validator in field.metadata["validator"]:
+ validator(inst, field, value)
+ else:
+ field.metadata["validator"](inst, field, value)
+
+
+def validate_fields(inst: Any) -> None:
+ """Validate the fields of a dataclass,
+ according to `validator` functions set in the field metadata.
+
+ This function should be called in the `__post_init__` of the dataclass.
+
+ The validator function should take as input (inst, field, value) and
+ raise an exception if the value is invalid.
+ """
+ for field in dc.fields(inst):
+ validate_field(inst, field, getattr(inst, field.name))
+
+
+class ValidatorType(Protocol):
+ def __call__(
+ self, inst: bytes, field: dc.Field, value: Any, suffix: str = ""
+ ) -> None:
+ ...
+
+
+def instance_of(type: type[Any] | tuple[type[Any], ...]) -> ValidatorType:
+ """
+ A validator that raises a `TypeError` if the initializer is called
+ with a wrong type for this particular attribute (checks are performed using
+ `isinstance` therefore it's also valid to pass a tuple of types).
+
+ :param type: The type to check for.
+ """
+
+ def _validator(inst, field, value, suffix=""):
+ """
+ We use a callable class to be able to change the ``__repr__``.
+ """
+ if not isinstance(value, type):
+ raise TypeError(
+ f"'{field.name}{suffix}' must be of type {type!r} "
+ f"(got {value!r} that is a {value.__class__!r})."
+ )
+
+ return _validator
+
+
+def optional(validator: ValidatorType) -> ValidatorType:
+ """
+ A validator that makes an attribute optional. An optional attribute is one
+ which can be set to ``None`` in addition to satisfying the requirements of
+ the sub-validator.
+ """
+
+ def _validator(inst, field, value, suffix=""):
+ if value is None:
+ return
+
+ validator(inst, field, value, suffix=suffix)
+
+ return _validator
+
+
+def is_callable(inst, field, value, suffix=""):
+ """
+ A validator that raises a `TypeError` if the
+ initializer is called with a value for this particular attribute
+ that is not callable.
+ """
+ if not callable(value):
+ raise TypeError(
+ f"'{field.name}{suffix}' must be callable "
+ f"(got {value!r} that is a {value.__class__!r})."
+ )
+
+
+def in_(options: Sequence) -> ValidatorType:
+ """
+ A validator that raises a `ValueError` if the initializer is called
+ with a value that does not belong in the options provided. The check is
+ performed using ``value in options``.
+
+ :param options: Allowed options.
+ """
+
+ def _validator(inst, field, value, suffix=""):
+ try:
+ in_options = value in options
+ except TypeError: # e.g. `1 in "abc"`
+ in_options = False
+
+ if not in_options:
+ raise ValueError(
+ f"'{field.name}{suffix}' must be in {options!r} (got {value!r})"
+ )
+
+ return _validator
+
+
+def deep_iterable(
+ member_validator: ValidatorType, iterable_validator: ValidatorType | None = None
+) -> ValidatorType:
+ """
+ A validator that performs deep validation of an iterable.
+
+ :param member_validator: Validator to apply to iterable members
+ :param iterable_validator: Validator to apply to iterable itself
+ """
+
+ def _validator(inst, field, value, suffix=""):
+ if iterable_validator is not None:
+ iterable_validator(inst, field, value, suffix=suffix)
+
+ for idx, member in enumerate(value):
+ member_validator(inst, field, member, suffix=f"{suffix}[{idx}]")
+
+ return _validator
+
+
+def deep_mapping(
+ key_validator: ValidatorType,
+ value_validator: ValidatorType,
+ mapping_validator: ValidatorType | None = None,
+) -> ValidatorType:
+ """
+ A validator that performs deep validation of a dictionary.
+
+ :param key_validator: Validator to apply to dictionary keys
+ :param value_validator: Validator to apply to dictionary values
+ :param mapping_validator: Validator to apply to top-level mapping attribute (optional)
+ """
+
+ def _validator(inst, field: dc.Field, value, suffix=""):
+ if mapping_validator is not None:
+ mapping_validator(inst, field, value)
+
+ for key in value:
+ key_validator(inst, field, key, suffix=f"{suffix}[{key!r}]")
+ value_validator(inst, field, value[key], suffix=f"{suffix}[{key!r}]")
+
+ return _validator
diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py
new file mode 100644
index 0000000..a134ea7
--- /dev/null
+++ b/myst_parser/config/main.py
@@ -0,0 +1,409 @@
+"""The configuration for the myst parser."""
+import dataclasses as dc
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ Optional,
+ Sequence,
+ Tuple,
+ Union,
+ cast,
+)
+
+from .dc_validators import (
+ deep_iterable,
+ deep_mapping,
+ in_,
+ instance_of,
+ is_callable,
+ optional,
+ validate_field,
+ validate_fields,
+)
+
+
+def check_extensions(_, __, value):
+ if not isinstance(value, Iterable):
+ raise TypeError(f"'enable_extensions' not iterable: {value}")
+ diff = set(value).difference(
+ [
+ "amsmath",
+ "attrs_image",
+ "colon_fence",
+ "deflist",
+ "dollarmath",
+ "fieldlist",
+ "html_admonition",
+ "html_image",
+ "linkify",
+ "replacements",
+ "smartquotes",
+ "strikethrough",
+ "substitution",
+ "tasklist",
+ ]
+ )
+ if diff:
+ raise ValueError(f"'enable_extensions' items not recognised: {diff}")
+
+
+def check_sub_delimiters(_, __, value):
+ if (not isinstance(value, (tuple, list))) or len(value) != 2:
+ raise TypeError(f"myst_sub_delimiters is not a tuple of length 2: {value}")
+ for delim in value:
+ if (not isinstance(delim, str)) or len(delim) != 1:
+ raise TypeError(
+ f"myst_sub_delimiters does not contain strings of length 1: {value}"
+ )
+
+
+@dc.dataclass()
+class MdParserConfig:
+ """Configuration options for the Markdown Parser.
+
+ Note in the sphinx configuration these option names are prepended with ``myst_``
+ """
+
+ # TODO replace commonmark_only, gfm_only with a single option
+
+ commonmark_only: bool = dc.field(
+ default=False,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Use strict CommonMark parser",
+ },
+ )
+ gfm_only: bool = dc.field(
+ default=False,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Use strict Github Flavoured Markdown parser",
+ },
+ )
+
+ enable_extensions: Sequence[str] = dc.field(
+ default_factory=list,
+ metadata={"validator": check_extensions, "help": "Enable syntax extensions"},
+ )
+
+ disable_syntax: Iterable[str] = dc.field(
+ default_factory=list,
+ metadata={
+ "validator": deep_iterable(instance_of(str), instance_of((list, tuple))),
+ "help": "Disable Commonmark syntax elements",
+ },
+ )
+
+ all_links_external: bool = dc.field(
+ default=False,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Parse all links as simple hyperlinks",
+ },
+ )
+
+ # see https://en.wikipedia.org/wiki/List_of_URI_schemes
+ url_schemes: Optional[Iterable[str]] = dc.field(
+ default=cast(Optional[Iterable[str]], ("http", "https", "mailto", "ftp")),
+ metadata={
+ "validator": optional(
+ deep_iterable(instance_of(str), instance_of((list, tuple)))
+ ),
+ "help": "URL scheme prefixes identified as external links",
+ },
+ )
+
+ ref_domains: Optional[Iterable[str]] = dc.field(
+ default=None,
+ metadata={
+ "validator": optional(
+ deep_iterable(instance_of(str), instance_of((list, tuple)))
+ ),
+ "help": "Sphinx domain names to search in for link references",
+ },
+ )
+
+ highlight_code_blocks: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Syntax highlight code blocks with pygments",
+ "docutils_only": True,
+ },
+ )
+
+ number_code_blocks: Sequence[str] = dc.field(
+ default_factory=list,
+ metadata={
+ "validator": deep_iterable(instance_of(str), instance_of((list, tuple))),
+ "help": "Add line numbers to code blocks with these languages",
+ },
+ )
+
+ title_to_header: bool = dc.field(
+ default=False,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Convert a `title` field in the top-matter to a H1 header",
+ },
+ )
+
+ heading_anchors: Optional[int] = dc.field(
+ default=None,
+ metadata={
+ "validator": optional(in_([1, 2, 3, 4, 5, 6, 7])),
+ "help": "Heading level depth to assign HTML anchors",
+ },
+ )
+
+ heading_slug_func: Optional[Callable[[str], str]] = dc.field(
+ default=None,
+ metadata={
+ "validator": optional(is_callable),
+ "help": "Function for creating heading anchors",
+ "global_only": True,
+ },
+ )
+
+ html_meta: Dict[str, str] = dc.field(
+ default_factory=dict,
+ repr=False,
+ metadata={
+ "validator": deep_mapping(
+ instance_of(str), instance_of(str), instance_of(dict)
+ ),
+ "merge_topmatter": True,
+ "help": "HTML meta tags",
+ },
+ )
+
+ footnote_transition: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Place a transition before any footnotes",
+ },
+ )
+
+ words_per_minute: int = dc.field(
+ default=200,
+ metadata={
+ "validator": instance_of(int),
+ "help": "For reading speed calculations",
+ },
+ )
+
+ # Extension specific
+
+ substitutions: Dict[str, Union[str, int, float]] = dc.field(
+ default_factory=dict,
+ repr=False,
+ metadata={
+ "validator": deep_mapping(
+ instance_of(str), instance_of((str, int, float)), instance_of(dict)
+ ),
+ "merge_topmatter": True,
+ "help": "Substitutions mapping",
+ "extension": "substitutions",
+ },
+ )
+
+ sub_delimiters: Tuple[str, str] = dc.field(
+ default=("{", "}"),
+ metadata={
+ "validator": check_sub_delimiters,
+ "help": "Substitution delimiters",
+ "extension": "substitutions",
+ },
+ )
+
+ linkify_fuzzy_links: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Recognise URLs without schema prefixes",
+ "extension": "linkify",
+ },
+ )
+
+ dmath_allow_labels: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Parse `$$...$$ (label)`",
+ "extension": "dollarmath",
+ },
+ )
+ dmath_allow_space: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Allow initial/final spaces in `$ ... $`",
+ "extension": "dollarmath",
+ },
+ )
+ dmath_allow_digits: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Allow initial/final digits `1$ ...$2`",
+ "extension": "dollarmath",
+ },
+ )
+ dmath_double_inline: bool = dc.field(
+ default=False,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Parse inline `$$ ... $$`",
+ "extension": "dollarmath",
+ },
+ )
+
+ update_mathjax: bool = dc.field(
+ default=True,
+ metadata={
+ "validator": instance_of(bool),
+ "help": "Update sphinx.ext.mathjax configuration to ignore `$` delimiters",
+ "extension": "dollarmath",
+ "global_only": True,
+ },
+ )
+
+ mathjax_classes: str = dc.field(
+ default="tex2jax_process|mathjax_process|math|output_area",
+ metadata={
+ "validator": instance_of(str),
+ "help": "MathJax classes to add to math HTML",
+ "extension": "dollarmath",
+ "global_only": True,
+ },
+ )
+
+ def __post_init__(self):
+ validate_fields(self)
+
+ def copy(self, **kwargs: Any) -> "MdParserConfig":
+ """Return a new object replacing specified fields with new values.
+
+ Note: initiating the copy will also validate the new fields.
+ """
+ return dc.replace(self, **kwargs)
+
+ @classmethod
+ def get_fields(cls) -> Tuple[dc.Field, ...]:
+ """Return all attribute fields in this class."""
+ return dc.fields(cls)
+
+ def as_dict(self, dict_factory=dict) -> dict:
+ """Return a dictionary of field name -> value."""
+ return dc.asdict(self, dict_factory=dict_factory)
+
+ def as_triple(self) -> Iterable[Tuple[str, Any, dc.Field]]:
+ """Yield triples of (name, value, field)."""
+ fields = {f.name: f for f in dc.fields(self.__class__)}
+ for name, value in dc.asdict(self).items():
+ yield name, value, fields[name]
+
+
+def merge_file_level(
+ config: MdParserConfig,
+ topmatter: Dict[str, Any],
+ warning: Callable[[str, str], None],
+) -> MdParserConfig:
+ """Merge the file-level topmatter with the global config.
+
+ :param config: Global config.
+ :param topmatter: Topmatter from the file.
+ :param warning: Function to call with a warning (type, message).
+ :returns: A new config object
+ """
+ # get updates
+ updates: Dict[str, Any] = {}
+ myst = topmatter.get("myst", {})
+ if not isinstance(myst, dict):
+ warning("topmatter", f"'myst' key not a dict: {type(myst)}")
+ else:
+ updates = myst
+
+ # allow html_meta and substitutions at top-level for back-compatibility
+ if "html_meta" in topmatter:
+ warning(
+ "topmatter",
+ "top-level 'html_meta' key is deprecated, "
+ "place under 'myst' key instead",
+ )
+ updates["html_meta"] = topmatter["html_meta"]
+ if "substitutions" in topmatter:
+ warning(
+ "topmatter",
+ "top-level 'substitutions' key is deprecated, "
+ "place under 'myst' key instead",
+ )
+ updates["substitutions"] = topmatter["substitutions"]
+
+ new = config.copy()
+
+ # validate each update
+ fields = {name: (value, field) for name, value, field in config.as_triple()}
+ for name, value in updates.items():
+
+ if name not in fields:
+ warning("topmatter", f"Unknown field: {name}")
+ continue
+
+ old_value, field = fields[name]
+
+ try:
+ validate_field(new, field, value)
+ except Exception as exc:
+ warning("topmatter", str(exc))
+ continue
+
+ if field.metadata.get("merge_topmatter"):
+ value = {**old_value, **value}
+
+ setattr(new, name, value)
+
+ return new
+
+
+class TopmatterReadError(Exception):
+ """Topmatter parsing error."""
+
+
+def read_topmatter(text: Union[str, Iterator[str]]) -> Optional[Dict[str, Any]]:
+ """Read the (optional) YAML topmatter from a source string.
+
+ This is identified by the first line starting with `---`,
+ then read up to a terminating line of `---`, or `...`.
+
+ :param source: The source string to read from
+ :return: The topmatter
+ """
+ import yaml
+
+ if isinstance(text, str):
+ if not text.startswith("---"): # skip creating the line list in memory
+ return None
+ text = (line for line in text.splitlines())
+ try:
+ if not next(text).startswith("---"):
+ return None
+ except StopIteration:
+ return None
+ top_matter = []
+ for line in text:
+ if line.startswith("---") or line.startswith("..."):
+ break
+ top_matter.append(line.rstrip() + "\n")
+ try:
+ metadata = yaml.safe_load("".join(top_matter))
+ assert isinstance(metadata, dict)
+ except (yaml.parser.ParserError, yaml.scanner.ScannerError) as err:
+ raise TopmatterReadError("Malformed YAML") from err
+ if not isinstance(metadata, dict):
+ raise TopmatterReadError(f"YAML is not a dict: {type(metadata)}")
+ return metadata