summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:21:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-29 04:21:29 +0000
commit683f1f3ddde13390781318b9fe4a6841ea35d145 (patch)
treec21fddf99f2e5233bb3f22bbafd0d711fe7c5d95 /src
parentInitial commit. (diff)
downloadpython-tomli-683f1f3ddde13390781318b9fe4a6841ea35d145.tar.xz
python-tomli-683f1f3ddde13390781318b9fe4a6841ea35d145.zip
Adding upstream version 2.0.1.upstream/2.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src')
-rw-r--r--src/tomli/__init__.py11
-rw-r--r--src/tomli/_parser.py691
-rw-r--r--src/tomli/_re.py107
-rw-r--r--src/tomli/_types.py10
-rw-r--r--src/tomli/py.typed1
5 files changed, 820 insertions, 0 deletions
diff --git a/src/tomli/__init__.py b/src/tomli/__init__.py
new file mode 100644
index 0000000..4c6ec97
--- /dev/null
+++ b/src/tomli/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+__all__ = ("loads", "load", "TOMLDecodeError")
+__version__ = "2.0.1" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
+
+from ._parser import TOMLDecodeError, load, loads
+
+# Pretend this exception was created here.
+TOMLDecodeError.__module__ = __name__
diff --git a/src/tomli/_parser.py b/src/tomli/_parser.py
new file mode 100644
index 0000000..f1bb0aa
--- /dev/null
+++ b/src/tomli/_parser.py
@@ -0,0 +1,691 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+import string
+from types import MappingProxyType
+from typing import Any, BinaryIO, NamedTuple
+
+from ._re import (
+ RE_DATETIME,
+ RE_LOCALTIME,
+ RE_NUMBER,
+ match_to_datetime,
+ match_to_localtime,
+ match_to_number,
+)
+from ._types import Key, ParseFloat, Pos
+
+ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
+
+# Neither of these sets include quotation mark or backslash. They are
+# currently handled as separate cases in the parser functions.
+ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
+ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
+
+ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
+ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
+
+ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
+
+TOML_WS = frozenset(" \t")
+TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n")
+BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
+KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
+HEXDIGIT_CHARS = frozenset(string.hexdigits)
+
+BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
+ {
+ "\\b": "\u0008", # backspace
+ "\\t": "\u0009", # tab
+ "\\n": "\u000A", # linefeed
+ "\\f": "\u000C", # form feed
+ "\\r": "\u000D", # carriage return
+ '\\"': "\u0022", # quote
+ "\\\\": "\u005C", # backslash
+ }
+)
+
+
+class TOMLDecodeError(ValueError):
+ """An error raised if a document is not valid TOML."""
+
+
+def load(__fp: BinaryIO, *, parse_float: ParseFloat = float) -> dict[str, Any]:
+ """Parse TOML from a binary file object."""
+ b = __fp.read()
+ try:
+ s = b.decode()
+ except AttributeError:
+ raise TypeError(
+ "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
+ ) from None
+ return loads(s, parse_float=parse_float)
+
+
+def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901
+ """Parse TOML from a string."""
+
+ # The spec allows converting "\r\n" to "\n", even in string
+ # literals. Let's do so to simplify parsing.
+ src = __s.replace("\r\n", "\n")
+ pos = 0
+ out = Output(NestedDict(), Flags())
+ header: Key = ()
+ parse_float = make_safe_parse_float(parse_float)
+
+ # Parse one statement at a time
+ # (typically means one line in TOML source)
+ while True:
+ # 1. Skip line leading whitespace
+ pos = skip_chars(src, pos, TOML_WS)
+
+ # 2. Parse rules. Expect one of the following:
+ # - end of file
+ # - end of line
+ # - comment
+ # - key/value pair
+ # - append dict to list (and move to its namespace)
+ # - create dict (and move to its namespace)
+ # Skip trailing whitespace when applicable.
+ try:
+ char = src[pos]
+ except IndexError:
+ break
+ if char == "\n":
+ pos += 1
+ continue
+ if char in KEY_INITIAL_CHARS:
+ pos = key_value_rule(src, pos, out, header, parse_float)
+ pos = skip_chars(src, pos, TOML_WS)
+ elif char == "[":
+ try:
+ second_char: str | None = src[pos + 1]
+ except IndexError:
+ second_char = None
+ out.flags.finalize_pending()
+ if second_char == "[":
+ pos, header = create_list_rule(src, pos, out)
+ else:
+ pos, header = create_dict_rule(src, pos, out)
+ pos = skip_chars(src, pos, TOML_WS)
+ elif char != "#":
+ raise suffixed_err(src, pos, "Invalid statement")
+
+ # 3. Skip comment
+ pos = skip_comment(src, pos)
+
+ # 4. Expect end of line or end of file
+ try:
+ char = src[pos]
+ except IndexError:
+ break
+ if char != "\n":
+ raise suffixed_err(
+ src, pos, "Expected newline or end of document after a statement"
+ )
+ pos += 1
+
+ return out.data.dict
+
+
+class Flags:
+ """Flags that map to parsed keys/namespaces."""
+
+ # Marks an immutable namespace (inline array or inline table).
+ FROZEN = 0
+ # Marks a nest that has been explicitly created and can no longer
+ # be opened using the "[table]" syntax.
+ EXPLICIT_NEST = 1
+
+ def __init__(self) -> None:
+ self._flags: dict[str, dict] = {}
+ self._pending_flags: set[tuple[Key, int]] = set()
+
+ def add_pending(self, key: Key, flag: int) -> None:
+ self._pending_flags.add((key, flag))
+
+ def finalize_pending(self) -> None:
+ for key, flag in self._pending_flags:
+ self.set(key, flag, recursive=False)
+ self._pending_flags.clear()
+
+ def unset_all(self, key: Key) -> None:
+ cont = self._flags
+ for k in key[:-1]:
+ if k not in cont:
+ return
+ cont = cont[k]["nested"]
+ cont.pop(key[-1], None)
+
+ def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
+ cont = self._flags
+ key_parent, key_stem = key[:-1], key[-1]
+ for k in key_parent:
+ if k not in cont:
+ cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
+ cont = cont[k]["nested"]
+ if key_stem not in cont:
+ cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
+ cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
+
+ def is_(self, key: Key, flag: int) -> bool:
+ if not key:
+ return False # document root has no flags
+ cont = self._flags
+ for k in key[:-1]:
+ if k not in cont:
+ return False
+ inner_cont = cont[k]
+ if flag in inner_cont["recursive_flags"]:
+ return True
+ cont = inner_cont["nested"]
+ key_stem = key[-1]
+ if key_stem in cont:
+ cont = cont[key_stem]
+ return flag in cont["flags"] or flag in cont["recursive_flags"]
+ return False
+
+
+class NestedDict:
+ def __init__(self) -> None:
+ # The parsed content of the TOML document
+ self.dict: dict[str, Any] = {}
+
+ def get_or_create_nest(
+ self,
+ key: Key,
+ *,
+ access_lists: bool = True,
+ ) -> dict:
+ cont: Any = self.dict
+ for k in key:
+ if k not in cont:
+ cont[k] = {}
+ cont = cont[k]
+ if access_lists and isinstance(cont, list):
+ cont = cont[-1]
+ if not isinstance(cont, dict):
+ raise KeyError("There is no nest behind this key")
+ return cont
+
+ def append_nest_to_list(self, key: Key) -> None:
+ cont = self.get_or_create_nest(key[:-1])
+ last_key = key[-1]
+ if last_key in cont:
+ list_ = cont[last_key]
+ if not isinstance(list_, list):
+ raise KeyError("An object other than list found behind this key")
+ list_.append({})
+ else:
+ cont[last_key] = [{}]
+
+
+class Output(NamedTuple):
+ data: NestedDict
+ flags: Flags
+
+
+def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
+ try:
+ while src[pos] in chars:
+ pos += 1
+ except IndexError:
+ pass
+ return pos
+
+
+def skip_until(
+ src: str,
+ pos: Pos,
+ expect: str,
+ *,
+ error_on: frozenset[str],
+ error_on_eof: bool,
+) -> Pos:
+ try:
+ new_pos = src.index(expect, pos)
+ except ValueError:
+ new_pos = len(src)
+ if error_on_eof:
+ raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None
+
+ if not error_on.isdisjoint(src[pos:new_pos]):
+ while src[pos] not in error_on:
+ pos += 1
+ raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}")
+ return new_pos
+
+
+def skip_comment(src: str, pos: Pos) -> Pos:
+ try:
+ char: str | None = src[pos]
+ except IndexError:
+ char = None
+ if char == "#":
+ return skip_until(
+ src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
+ )
+ return pos
+
+
+def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
+ while True:
+ pos_before_skip = pos
+ pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
+ pos = skip_comment(src, pos)
+ if pos == pos_before_skip:
+ return pos
+
+
+def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
+ pos += 1 # Skip "["
+ pos = skip_chars(src, pos, TOML_WS)
+ pos, key = parse_key(src, pos)
+
+ if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
+ raise suffixed_err(src, pos, f"Cannot declare {key} twice")
+ out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
+ try:
+ out.data.get_or_create_nest(key)
+ except KeyError:
+ raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+
+ if not src.startswith("]", pos):
+ raise suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
+ return pos + 1, key
+
+
+def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
+ pos += 2 # Skip "[["
+ pos = skip_chars(src, pos, TOML_WS)
+ pos, key = parse_key(src, pos)
+
+ if out.flags.is_(key, Flags.FROZEN):
+ raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}")
+ # Free the namespace now that it points to another empty list item...
+ out.flags.unset_all(key)
+ # ...but this key precisely is still prohibited from table declaration
+ out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
+ try:
+ out.data.append_nest_to_list(key)
+ except KeyError:
+ raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+
+ if not src.startswith("]]", pos):
+ raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
+ return pos + 2, key
+
+
+def key_value_rule(
+ src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
+) -> Pos:
+ pos, key, value = parse_key_value_pair(src, pos, parse_float)
+ key_parent, key_stem = key[:-1], key[-1]
+ abs_key_parent = header + key_parent
+
+ relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
+ for cont_key in relative_path_cont_keys:
+ # Check that dotted key syntax does not redefine an existing table
+ if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
+ raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}")
+ # Containers in the relative path can't be opened with the table syntax or
+ # dotted key/value syntax in following table sections.
+ out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
+
+ if out.flags.is_(abs_key_parent, Flags.FROZEN):
+ raise suffixed_err(
+ src, pos, f"Cannot mutate immutable namespace {abs_key_parent}"
+ )
+
+ try:
+ nest = out.data.get_or_create_nest(abs_key_parent)
+ except KeyError:
+ raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+ if key_stem in nest:
+ raise suffixed_err(src, pos, "Cannot overwrite a value")
+ # Mark inline table and array namespaces recursively immutable
+ if isinstance(value, (dict, list)):
+ out.flags.set(header + key, Flags.FROZEN, recursive=True)
+ nest[key_stem] = value
+ return pos
+
+
+def parse_key_value_pair(
+ src: str, pos: Pos, parse_float: ParseFloat
+) -> tuple[Pos, Key, Any]:
+ pos, key = parse_key(src, pos)
+ try:
+ char: str | None = src[pos]
+ except IndexError:
+ char = None
+ if char != "=":
+ raise suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
+ pos += 1
+ pos = skip_chars(src, pos, TOML_WS)
+ pos, value = parse_value(src, pos, parse_float)
+ return pos, key, value
+
+
+def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
+ pos, key_part = parse_key_part(src, pos)
+ key: Key = (key_part,)
+ pos = skip_chars(src, pos, TOML_WS)
+ while True:
+ try:
+ char: str | None = src[pos]
+ except IndexError:
+ char = None
+ if char != ".":
+ return pos, key
+ pos += 1
+ pos = skip_chars(src, pos, TOML_WS)
+ pos, key_part = parse_key_part(src, pos)
+ key += (key_part,)
+ pos = skip_chars(src, pos, TOML_WS)
+
+
+def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
+ try:
+ char: str | None = src[pos]
+ except IndexError:
+ char = None
+ if char in BARE_KEY_CHARS:
+ start_pos = pos
+ pos = skip_chars(src, pos, BARE_KEY_CHARS)
+ return pos, src[start_pos:pos]
+ if char == "'":
+ return parse_literal_str(src, pos)
+ if char == '"':
+ return parse_one_line_basic_str(src, pos)
+ raise suffixed_err(src, pos, "Invalid initial character for a key part")
+
+
+def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
+ pos += 1
+ return parse_basic_str(src, pos, multiline=False)
+
+
+def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]:
+ pos += 1
+ array: list = []
+
+ pos = skip_comments_and_array_ws(src, pos)
+ if src.startswith("]", pos):
+ return pos + 1, array
+ while True:
+ pos, val = parse_value(src, pos, parse_float)
+ array.append(val)
+ pos = skip_comments_and_array_ws(src, pos)
+
+ c = src[pos : pos + 1]
+ if c == "]":
+ return pos + 1, array
+ if c != ",":
+ raise suffixed_err(src, pos, "Unclosed array")
+ pos += 1
+
+ pos = skip_comments_and_array_ws(src, pos)
+ if src.startswith("]", pos):
+ return pos + 1, array
+
+
+def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]:
+ pos += 1
+ nested_dict = NestedDict()
+ flags = Flags()
+
+ pos = skip_chars(src, pos, TOML_WS)
+ if src.startswith("}", pos):
+ return pos + 1, nested_dict.dict
+ while True:
+ pos, key, value = parse_key_value_pair(src, pos, parse_float)
+ key_parent, key_stem = key[:-1], key[-1]
+ if flags.is_(key, Flags.FROZEN):
+ raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}")
+ try:
+ nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
+ except KeyError:
+ raise suffixed_err(src, pos, "Cannot overwrite a value") from None
+ if key_stem in nest:
+ raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}")
+ nest[key_stem] = value
+ pos = skip_chars(src, pos, TOML_WS)
+ c = src[pos : pos + 1]
+ if c == "}":
+ return pos + 1, nested_dict.dict
+ if c != ",":
+ raise suffixed_err(src, pos, "Unclosed inline table")
+ if isinstance(value, (dict, list)):
+ flags.set(key, Flags.FROZEN, recursive=True)
+ pos += 1
+ pos = skip_chars(src, pos, TOML_WS)
+
+
+def parse_basic_str_escape(
+ src: str, pos: Pos, *, multiline: bool = False
+) -> tuple[Pos, str]:
+ escape_id = src[pos : pos + 2]
+ pos += 2
+ if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
+ # Skip whitespace until next non-whitespace character or end of
+ # the doc. Error if non-whitespace is found before newline.
+ if escape_id != "\\\n":
+ pos = skip_chars(src, pos, TOML_WS)
+ try:
+ char = src[pos]
+ except IndexError:
+ return pos, ""
+ if char != "\n":
+ raise suffixed_err(src, pos, "Unescaped '\\' in a string")
+ pos += 1
+ pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
+ return pos, ""
+ if escape_id == "\\u":
+ return parse_hex_char(src, pos, 4)
+ if escape_id == "\\U":
+ return parse_hex_char(src, pos, 8)
+ try:
+ return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
+ except KeyError:
+ raise suffixed_err(src, pos, "Unescaped '\\' in a string") from None
+
+
+def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
+ return parse_basic_str_escape(src, pos, multiline=True)
+
+
+def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
+ hex_str = src[pos : pos + hex_len]
+ if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
+ raise suffixed_err(src, pos, "Invalid hex value")
+ pos += hex_len
+ hex_int = int(hex_str, 16)
+ if not is_unicode_scalar_value(hex_int):
+ raise suffixed_err(src, pos, "Escaped character is not a Unicode scalar value")
+ return pos, chr(hex_int)
+
+
+def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
+ pos += 1 # Skip starting apostrophe
+ start_pos = pos
+ pos = skip_until(
+ src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
+ )
+ return pos + 1, src[start_pos:pos] # Skip ending apostrophe
+
+
+def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
+ pos += 3
+ if src.startswith("\n", pos):
+ pos += 1
+
+ if literal:
+ delim = "'"
+ end_pos = skip_until(
+ src,
+ pos,
+ "'''",
+ error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
+ error_on_eof=True,
+ )
+ result = src[pos:end_pos]
+ pos = end_pos + 3
+ else:
+ delim = '"'
+ pos, result = parse_basic_str(src, pos, multiline=True)
+
+ # Add at maximum two extra apostrophes/quotes if the end sequence
+ # is 4 or 5 chars long instead of just 3.
+ if not src.startswith(delim, pos):
+ return pos, result
+ pos += 1
+ if not src.startswith(delim, pos):
+ return pos, result + delim
+ pos += 1
+ return pos, result + (delim * 2)
+
+
+def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
+ if multiline:
+ error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
+ parse_escapes = parse_basic_str_escape_multiline
+ else:
+ error_on = ILLEGAL_BASIC_STR_CHARS
+ parse_escapes = parse_basic_str_escape
+ result = ""
+ start_pos = pos
+ while True:
+ try:
+ char = src[pos]
+ except IndexError:
+ raise suffixed_err(src, pos, "Unterminated string") from None
+ if char == '"':
+ if not multiline:
+ return pos + 1, result + src[start_pos:pos]
+ if src.startswith('"""', pos):
+ return pos + 3, result + src[start_pos:pos]
+ pos += 1
+ continue
+ if char == "\\":
+ result += src[start_pos:pos]
+ pos, parsed_escape = parse_escapes(src, pos)
+ result += parsed_escape
+ start_pos = pos
+ continue
+ if char in error_on:
+ raise suffixed_err(src, pos, f"Illegal character {char!r}")
+ pos += 1
+
+
+def parse_value( # noqa: C901
+ src: str, pos: Pos, parse_float: ParseFloat
+) -> tuple[Pos, Any]:
+ try:
+ char: str | None = src[pos]
+ except IndexError:
+ char = None
+
+ # IMPORTANT: order conditions based on speed of checking and likelihood
+
+ # Basic strings
+ if char == '"':
+ if src.startswith('"""', pos):
+ return parse_multiline_str(src, pos, literal=False)
+ return parse_one_line_basic_str(src, pos)
+
+ # Literal strings
+ if char == "'":
+ if src.startswith("'''", pos):
+ return parse_multiline_str(src, pos, literal=True)
+ return parse_literal_str(src, pos)
+
+ # Booleans
+ if char == "t":
+ if src.startswith("true", pos):
+ return pos + 4, True
+ if char == "f":
+ if src.startswith("false", pos):
+ return pos + 5, False
+
+ # Arrays
+ if char == "[":
+ return parse_array(src, pos, parse_float)
+
+ # Inline tables
+ if char == "{":
+ return parse_inline_table(src, pos, parse_float)
+
+ # Dates and times
+ datetime_match = RE_DATETIME.match(src, pos)
+ if datetime_match:
+ try:
+ datetime_obj = match_to_datetime(datetime_match)
+ except ValueError as e:
+ raise suffixed_err(src, pos, "Invalid date or datetime") from e
+ return datetime_match.end(), datetime_obj
+ localtime_match = RE_LOCALTIME.match(src, pos)
+ if localtime_match:
+ return localtime_match.end(), match_to_localtime(localtime_match)
+
+ # Integers and "normal" floats.
+ # The regex will greedily match any type starting with a decimal
+ # char, so needs to be located after handling of dates and times.
+ number_match = RE_NUMBER.match(src, pos)
+ if number_match:
+ return number_match.end(), match_to_number(number_match, parse_float)
+
+ # Special floats
+ first_three = src[pos : pos + 3]
+ if first_three in {"inf", "nan"}:
+ return pos + 3, parse_float(first_three)
+ first_four = src[pos : pos + 4]
+ if first_four in {"-inf", "+inf", "-nan", "+nan"}:
+ return pos + 4, parse_float(first_four)
+
+ raise suffixed_err(src, pos, "Invalid value")
+
+
+def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
+ """Return a `TOMLDecodeError` where error message is suffixed with
+ coordinates in source."""
+
+ def coord_repr(src: str, pos: Pos) -> str:
+ if pos >= len(src):
+ return "end of document"
+ line = src.count("\n", 0, pos) + 1
+ if line == 1:
+ column = pos + 1
+ else:
+ column = pos - src.rindex("\n", 0, pos)
+ return f"line {line}, column {column}"
+
+ return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})")
+
+
+def is_unicode_scalar_value(codepoint: int) -> bool:
+ return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
+
+
+def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
+ """A decorator to make `parse_float` safe.
+
+ `parse_float` must not return dicts or lists, because these types
+ would be mixed with parsed TOML tables and arrays, thus confusing
+ the parser. The returned decorated callable raises `ValueError`
+ instead of returning illegal types.
+ """
+ # The default `float` callable never returns illegal types. Optimize it.
+ if parse_float is float: # type: ignore[comparison-overlap]
+ return float
+
+ def safe_parse_float(float_str: str) -> Any:
+ float_value = parse_float(float_str)
+ if isinstance(float_value, (dict, list)):
+ raise ValueError("parse_float must not return dicts or lists")
+ return float_value
+
+ return safe_parse_float
diff --git a/src/tomli/_re.py b/src/tomli/_re.py
new file mode 100644
index 0000000..994bb74
--- /dev/null
+++ b/src/tomli/_re.py
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+from __future__ import annotations
+
+from datetime import date, datetime, time, timedelta, timezone, tzinfo
+from functools import lru_cache
+import re
+from typing import Any
+
+from ._types import ParseFloat
+
+# E.g.
+# - 00:32:00.999999
+# - 00:32:00
+_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"
+
+RE_NUMBER = re.compile(
+ r"""
+0
+(?:
+ x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
+ |
+ b[01](?:_?[01])* # bin
+ |
+ o[0-7](?:_?[0-7])* # oct
+)
+|
+[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
+(?P<floatpart>
+ (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
+ (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
+)
+""",
+ flags=re.VERBOSE,
+)
+RE_LOCALTIME = re.compile(_TIME_RE_STR)
+RE_DATETIME = re.compile(
+ rf"""
+([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
+(?:
+ [Tt ]
+ {_TIME_RE_STR}
+ (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
+)?
+""",
+ flags=re.VERBOSE,
+)
+
+
+def match_to_datetime(match: re.Match) -> datetime | date:
+ """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
+
+ Raises ValueError if the match does not correspond to a valid date
+ or datetime.
+ """
+ (
+ year_str,
+ month_str,
+ day_str,
+ hour_str,
+ minute_str,
+ sec_str,
+ micros_str,
+ zulu_time,
+ offset_sign_str,
+ offset_hour_str,
+ offset_minute_str,
+ ) = match.groups()
+ year, month, day = int(year_str), int(month_str), int(day_str)
+ if hour_str is None:
+ return date(year, month, day)
+ hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
+ micros = int(micros_str.ljust(6, "0")) if micros_str else 0
+ if offset_sign_str:
+ tz: tzinfo | None = cached_tz(
+ offset_hour_str, offset_minute_str, offset_sign_str
+ )
+ elif zulu_time:
+ tz = timezone.utc
+ else: # local date-time
+ tz = None
+ return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
+
+
+@lru_cache(maxsize=None)
+def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
+ sign = 1 if sign_str == "+" else -1
+ return timezone(
+ timedelta(
+ hours=sign * int(hour_str),
+ minutes=sign * int(minute_str),
+ )
+ )
+
+
+def match_to_localtime(match: re.Match) -> time:
+ hour_str, minute_str, sec_str, micros_str = match.groups()
+ micros = int(micros_str.ljust(6, "0")) if micros_str else 0
+ return time(int(hour_str), int(minute_str), int(sec_str), micros)
+
+
+def match_to_number(match: re.Match, parse_float: ParseFloat) -> Any:
+ if match.group("floatpart"):
+ return parse_float(match.group())
+ return int(match.group(), 0)
diff --git a/src/tomli/_types.py b/src/tomli/_types.py
new file mode 100644
index 0000000..d949412
--- /dev/null
+++ b/src/tomli/_types.py
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
+# Licensed to PSF under a Contributor Agreement.
+
+from typing import Any, Callable, Tuple
+
+# Type annotations
+ParseFloat = Callable[[str], Any]
+Key = Tuple[str, ...]
+Pos = int
diff --git a/src/tomli/py.typed b/src/tomli/py.typed
new file mode 100644
index 0000000..7632ecf
--- /dev/null
+++ b/src/tomli/py.typed
@@ -0,0 +1 @@
+# Marker file for PEP 561