diff options
Diffstat (limited to 'myst_parser/config')
-rw-r--r-- | myst_parser/config/__init__.py | 1 | ||||
-rw-r--r-- | myst_parser/config/dc_validators.py | 161 | ||||
-rw-r--r-- | myst_parser/config/main.py | 409 |
3 files changed, 571 insertions, 0 deletions
diff --git a/myst_parser/config/__init__.py b/myst_parser/config/__init__.py new file mode 100644 index 0000000..898f9ce --- /dev/null +++ b/myst_parser/config/__init__.py @@ -0,0 +1 @@ +"""This module holds the global configuration for the parser ``MdParserConfig``.""" diff --git a/myst_parser/config/dc_validators.py b/myst_parser/config/dc_validators.py new file mode 100644 index 0000000..765cfb9 --- /dev/null +++ b/myst_parser/config/dc_validators.py @@ -0,0 +1,161 @@ +"""Validators for dataclasses, mirroring those of https://github.com/python-attrs/attrs.""" +from __future__ import annotations + +import dataclasses as dc +from typing import Any, Sequence + +from typing_extensions import Protocol + + +def validate_field(inst: Any, field: dc.Field, value: Any) -> None: + """Validate the field of a dataclass, + according to a `validator` function set in the field.metadata. + + The validator function should take as input (inst, field, value) and + raise an exception if the value is invalid. + """ + if "validator" not in field.metadata: + return + if isinstance(field.metadata["validator"], list): + for validator in field.metadata["validator"]: + validator(inst, field, value) + else: + field.metadata["validator"](inst, field, value) + + +def validate_fields(inst: Any) -> None: + """Validate the fields of a dataclass, + according to `validator` functions set in the field metadata. + + This function should be called in the `__post_init__` of the dataclass. + + The validator function should take as input (inst, field, value) and + raise an exception if the value is invalid. + """ + for field in dc.fields(inst): + validate_field(inst, field, getattr(inst, field.name)) + + +class ValidatorType(Protocol): + def __call__( + self, inst: bytes, field: dc.Field, value: Any, suffix: str = "" + ) -> None: + ... + + +def instance_of(type: type[Any] | tuple[type[Any], ...]) -> ValidatorType: + """ + A validator that raises a `TypeError` if the initializer is called + with a wrong type for this particular attribute (checks are performed using + `isinstance` therefore it's also valid to pass a tuple of types). + + :param type: The type to check for. + """ + + def _validator(inst, field, value, suffix=""): + """ + We use a callable class to be able to change the ``__repr__``. + """ + if not isinstance(value, type): + raise TypeError( + f"'{field.name}{suffix}' must be of type {type!r} " + f"(got {value!r} that is a {value.__class__!r})." + ) + + return _validator + + +def optional(validator: ValidatorType) -> ValidatorType: + """ + A validator that makes an attribute optional. An optional attribute is one + which can be set to ``None`` in addition to satisfying the requirements of + the sub-validator. + """ + + def _validator(inst, field, value, suffix=""): + if value is None: + return + + validator(inst, field, value, suffix=suffix) + + return _validator + + +def is_callable(inst, field, value, suffix=""): + """ + A validator that raises a `TypeError` if the + initializer is called with a value for this particular attribute + that is not callable. + """ + if not callable(value): + raise TypeError( + f"'{field.name}{suffix}' must be callable " + f"(got {value!r} that is a {value.__class__!r})." + ) + + +def in_(options: Sequence) -> ValidatorType: + """ + A validator that raises a `ValueError` if the initializer is called + with a value that does not belong in the options provided. The check is + performed using ``value in options``. + + :param options: Allowed options. + """ + + def _validator(inst, field, value, suffix=""): + try: + in_options = value in options + except TypeError: # e.g. `1 in "abc"` + in_options = False + + if not in_options: + raise ValueError( + f"'{field.name}{suffix}' must be in {options!r} (got {value!r})" + ) + + return _validator + + +def deep_iterable( + member_validator: ValidatorType, iterable_validator: ValidatorType | None = None +) -> ValidatorType: + """ + A validator that performs deep validation of an iterable. + + :param member_validator: Validator to apply to iterable members + :param iterable_validator: Validator to apply to iterable itself + """ + + def _validator(inst, field, value, suffix=""): + if iterable_validator is not None: + iterable_validator(inst, field, value, suffix=suffix) + + for idx, member in enumerate(value): + member_validator(inst, field, member, suffix=f"{suffix}[{idx}]") + + return _validator + + +def deep_mapping( + key_validator: ValidatorType, + value_validator: ValidatorType, + mapping_validator: ValidatorType | None = None, +) -> ValidatorType: + """ + A validator that performs deep validation of a dictionary. + + :param key_validator: Validator to apply to dictionary keys + :param value_validator: Validator to apply to dictionary values + :param mapping_validator: Validator to apply to top-level mapping attribute (optional) + """ + + def _validator(inst, field: dc.Field, value, suffix=""): + if mapping_validator is not None: + mapping_validator(inst, field, value) + + for key in value: + key_validator(inst, field, key, suffix=f"{suffix}[{key!r}]") + value_validator(inst, field, value[key], suffix=f"{suffix}[{key!r}]") + + return _validator diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py new file mode 100644 index 0000000..a134ea7 --- /dev/null +++ b/myst_parser/config/main.py @@ -0,0 +1,409 @@ +"""The configuration for the myst parser.""" +import dataclasses as dc +from typing import ( + Any, + Callable, + Dict, + Iterable, + Iterator, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from .dc_validators import ( + deep_iterable, + deep_mapping, + in_, + instance_of, + is_callable, + optional, + validate_field, + validate_fields, +) + + +def check_extensions(_, __, value): + if not isinstance(value, Iterable): + raise TypeError(f"'enable_extensions' not iterable: {value}") + diff = set(value).difference( + [ + "amsmath", + "attrs_image", + "colon_fence", + "deflist", + "dollarmath", + "fieldlist", + "html_admonition", + "html_image", + "linkify", + "replacements", + "smartquotes", + "strikethrough", + "substitution", + "tasklist", + ] + ) + if diff: + raise ValueError(f"'enable_extensions' items not recognised: {diff}") + + +def check_sub_delimiters(_, __, value): + if (not isinstance(value, (tuple, list))) or len(value) != 2: + raise TypeError(f"myst_sub_delimiters is not a tuple of length 2: {value}") + for delim in value: + if (not isinstance(delim, str)) or len(delim) != 1: + raise TypeError( + f"myst_sub_delimiters does not contain strings of length 1: {value}" + ) + + +@dc.dataclass() +class MdParserConfig: + """Configuration options for the Markdown Parser. + + Note in the sphinx configuration these option names are prepended with ``myst_`` + """ + + # TODO replace commonmark_only, gfm_only with a single option + + commonmark_only: bool = dc.field( + default=False, + metadata={ + "validator": instance_of(bool), + "help": "Use strict CommonMark parser", + }, + ) + gfm_only: bool = dc.field( + default=False, + metadata={ + "validator": instance_of(bool), + "help": "Use strict Github Flavoured Markdown parser", + }, + ) + + enable_extensions: Sequence[str] = dc.field( + default_factory=list, + metadata={"validator": check_extensions, "help": "Enable syntax extensions"}, + ) + + disable_syntax: Iterable[str] = dc.field( + default_factory=list, + metadata={ + "validator": deep_iterable(instance_of(str), instance_of((list, tuple))), + "help": "Disable Commonmark syntax elements", + }, + ) + + all_links_external: bool = dc.field( + default=False, + metadata={ + "validator": instance_of(bool), + "help": "Parse all links as simple hyperlinks", + }, + ) + + # see https://en.wikipedia.org/wiki/List_of_URI_schemes + url_schemes: Optional[Iterable[str]] = dc.field( + default=cast(Optional[Iterable[str]], ("http", "https", "mailto", "ftp")), + metadata={ + "validator": optional( + deep_iterable(instance_of(str), instance_of((list, tuple))) + ), + "help": "URL scheme prefixes identified as external links", + }, + ) + + ref_domains: Optional[Iterable[str]] = dc.field( + default=None, + metadata={ + "validator": optional( + deep_iterable(instance_of(str), instance_of((list, tuple))) + ), + "help": "Sphinx domain names to search in for link references", + }, + ) + + highlight_code_blocks: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Syntax highlight code blocks with pygments", + "docutils_only": True, + }, + ) + + number_code_blocks: Sequence[str] = dc.field( + default_factory=list, + metadata={ + "validator": deep_iterable(instance_of(str), instance_of((list, tuple))), + "help": "Add line numbers to code blocks with these languages", + }, + ) + + title_to_header: bool = dc.field( + default=False, + metadata={ + "validator": instance_of(bool), + "help": "Convert a `title` field in the top-matter to a H1 header", + }, + ) + + heading_anchors: Optional[int] = dc.field( + default=None, + metadata={ + "validator": optional(in_([1, 2, 3, 4, 5, 6, 7])), + "help": "Heading level depth to assign HTML anchors", + }, + ) + + heading_slug_func: Optional[Callable[[str], str]] = dc.field( + default=None, + metadata={ + "validator": optional(is_callable), + "help": "Function for creating heading anchors", + "global_only": True, + }, + ) + + html_meta: Dict[str, str] = dc.field( + default_factory=dict, + repr=False, + metadata={ + "validator": deep_mapping( + instance_of(str), instance_of(str), instance_of(dict) + ), + "merge_topmatter": True, + "help": "HTML meta tags", + }, + ) + + footnote_transition: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Place a transition before any footnotes", + }, + ) + + words_per_minute: int = dc.field( + default=200, + metadata={ + "validator": instance_of(int), + "help": "For reading speed calculations", + }, + ) + + # Extension specific + + substitutions: Dict[str, Union[str, int, float]] = dc.field( + default_factory=dict, + repr=False, + metadata={ + "validator": deep_mapping( + instance_of(str), instance_of((str, int, float)), instance_of(dict) + ), + "merge_topmatter": True, + "help": "Substitutions mapping", + "extension": "substitutions", + }, + ) + + sub_delimiters: Tuple[str, str] = dc.field( + default=("{", "}"), + metadata={ + "validator": check_sub_delimiters, + "help": "Substitution delimiters", + "extension": "substitutions", + }, + ) + + linkify_fuzzy_links: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Recognise URLs without schema prefixes", + "extension": "linkify", + }, + ) + + dmath_allow_labels: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Parse `$$...$$ (label)`", + "extension": "dollarmath", + }, + ) + dmath_allow_space: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Allow initial/final spaces in `$ ... $`", + "extension": "dollarmath", + }, + ) + dmath_allow_digits: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Allow initial/final digits `1$ ...$2`", + "extension": "dollarmath", + }, + ) + dmath_double_inline: bool = dc.field( + default=False, + metadata={ + "validator": instance_of(bool), + "help": "Parse inline `$$ ... $$`", + "extension": "dollarmath", + }, + ) + + update_mathjax: bool = dc.field( + default=True, + metadata={ + "validator": instance_of(bool), + "help": "Update sphinx.ext.mathjax configuration to ignore `$` delimiters", + "extension": "dollarmath", + "global_only": True, + }, + ) + + mathjax_classes: str = dc.field( + default="tex2jax_process|mathjax_process|math|output_area", + metadata={ + "validator": instance_of(str), + "help": "MathJax classes to add to math HTML", + "extension": "dollarmath", + "global_only": True, + }, + ) + + def __post_init__(self): + validate_fields(self) + + def copy(self, **kwargs: Any) -> "MdParserConfig": + """Return a new object replacing specified fields with new values. + + Note: initiating the copy will also validate the new fields. + """ + return dc.replace(self, **kwargs) + + @classmethod + def get_fields(cls) -> Tuple[dc.Field, ...]: + """Return all attribute fields in this class.""" + return dc.fields(cls) + + def as_dict(self, dict_factory=dict) -> dict: + """Return a dictionary of field name -> value.""" + return dc.asdict(self, dict_factory=dict_factory) + + def as_triple(self) -> Iterable[Tuple[str, Any, dc.Field]]: + """Yield triples of (name, value, field).""" + fields = {f.name: f for f in dc.fields(self.__class__)} + for name, value in dc.asdict(self).items(): + yield name, value, fields[name] + + +def merge_file_level( + config: MdParserConfig, + topmatter: Dict[str, Any], + warning: Callable[[str, str], None], +) -> MdParserConfig: + """Merge the file-level topmatter with the global config. + + :param config: Global config. + :param topmatter: Topmatter from the file. + :param warning: Function to call with a warning (type, message). + :returns: A new config object + """ + # get updates + updates: Dict[str, Any] = {} + myst = topmatter.get("myst", {}) + if not isinstance(myst, dict): + warning("topmatter", f"'myst' key not a dict: {type(myst)}") + else: + updates = myst + + # allow html_meta and substitutions at top-level for back-compatibility + if "html_meta" in topmatter: + warning( + "topmatter", + "top-level 'html_meta' key is deprecated, " + "place under 'myst' key instead", + ) + updates["html_meta"] = topmatter["html_meta"] + if "substitutions" in topmatter: + warning( + "topmatter", + "top-level 'substitutions' key is deprecated, " + "place under 'myst' key instead", + ) + updates["substitutions"] = topmatter["substitutions"] + + new = config.copy() + + # validate each update + fields = {name: (value, field) for name, value, field in config.as_triple()} + for name, value in updates.items(): + + if name not in fields: + warning("topmatter", f"Unknown field: {name}") + continue + + old_value, field = fields[name] + + try: + validate_field(new, field, value) + except Exception as exc: + warning("topmatter", str(exc)) + continue + + if field.metadata.get("merge_topmatter"): + value = {**old_value, **value} + + setattr(new, name, value) + + return new + + +class TopmatterReadError(Exception): + """Topmatter parsing error.""" + + +def read_topmatter(text: Union[str, Iterator[str]]) -> Optional[Dict[str, Any]]: + """Read the (optional) YAML topmatter from a source string. + + This is identified by the first line starting with `---`, + then read up to a terminating line of `---`, or `...`. + + :param source: The source string to read from + :return: The topmatter + """ + import yaml + + if isinstance(text, str): + if not text.startswith("---"): # skip creating the line list in memory + return None + text = (line for line in text.splitlines()) + try: + if not next(text).startswith("---"): + return None + except StopIteration: + return None + top_matter = [] + for line in text: + if line.startswith("---") or line.startswith("..."): + break + top_matter.append(line.rstrip() + "\n") + try: + metadata = yaml.safe_load("".join(top_matter)) + assert isinstance(metadata, dict) + except (yaml.parser.ParserError, yaml.scanner.ScannerError) as err: + raise TopmatterReadError("Malformed YAML") from err + if not isinstance(metadata, dict): + raise TopmatterReadError(f"YAML is not a dict: {type(metadata)}") + return metadata |