From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../pkg_resources/_vendor/packaging/metadata.py    | 408 +++++++++++++++++++++
 1 file changed, 408 insertions(+)
 create mode 100644 third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py

(limited to 'third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py')

diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py
new file mode 100644
index 0000000000..e76a60c395
--- /dev/null
+++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py
@@ -0,0 +1,408 @@
+import email.feedparser
+import email.header
+import email.message
+import email.parser
+import email.policy
+import sys
+import typing
+from typing import Dict, List, Optional, Tuple, Union, cast
+
+if sys.version_info >= (3, 8):  # pragma: no cover
+    from typing import TypedDict
+else:  # pragma: no cover
+    if typing.TYPE_CHECKING:
+        from typing_extensions import TypedDict
+    else:
+        try:
+            from typing_extensions import TypedDict
+        except ImportError:
+
+            class TypedDict:
+                def __init_subclass__(*_args, **_kwargs):
+                    pass
+
+
+# The RawMetadata class attempts to make as few assumptions about the underlying
+# serialization formats as possible. The idea is that as long as a serialization
+# formats offer some very basic primitives in *some* way then we can support
+# serializing to and from that format.
+class RawMetadata(TypedDict, total=False):
+    """A dictionary of raw core metadata.
+
+    Each field in core metadata maps to a key of this dictionary (when data is
+    provided). The key is lower-case and underscores are used instead of dashes
+    compared to the equivalent core metadata field. Any core metadata field that
+    can be specified multiple times or can hold multiple values in a single
+    field have a key with a plural name.
+
+    Core metadata fields that can be specified multiple times are stored as a
+    list or dict depending on which is appropriate for the field. Any fields
+    which hold multiple values in a single field are stored as a list.
+
+    """
+
+    # Metadata 1.0 - PEP 241
+    metadata_version: str
+    name: str
+    version: str
+    platforms: List[str]
+    summary: str
+    description: str
+    keywords: List[str]
+    home_page: str
+    author: str
+    author_email: str
+    license: str
+
+    # Metadata 1.1 - PEP 314
+    supported_platforms: List[str]
+    download_url: str
+    classifiers: List[str]
+    requires: List[str]
+    provides: List[str]
+    obsoletes: List[str]
+
+    # Metadata 1.2 - PEP 345
+    maintainer: str
+    maintainer_email: str
+    requires_dist: List[str]
+    provides_dist: List[str]
+    obsoletes_dist: List[str]
+    requires_python: str
+    requires_external: List[str]
+    project_urls: Dict[str, str]
+
+    # Metadata 2.0
+    # PEP 426 attempted to completely revamp the metadata format
+    # but got stuck without ever being able to build consensus on
+    # it and ultimately ended up withdrawn.
+    #
+    # However, a number of tools had started emiting METADATA with
+    # `2.0` Metadata-Version, so for historical reasons, this version
+    # was skipped.
+
+    # Metadata 2.1 - PEP 566
+    description_content_type: str
+    provides_extra: List[str]
+
+    # Metadata 2.2 - PEP 643
+    dynamic: List[str]
+
+    # Metadata 2.3 - PEP 685
+    # No new fields were added in PEP 685, just some edge case were
+    # tightened up to provide better interoptability.
+
+
+_STRING_FIELDS = {
+    "author",
+    "author_email",
+    "description",
+    "description_content_type",
+    "download_url",
+    "home_page",
+    "license",
+    "maintainer",
+    "maintainer_email",
+    "metadata_version",
+    "name",
+    "requires_python",
+    "summary",
+    "version",
+}
+
+_LIST_STRING_FIELDS = {
+    "classifiers",
+    "dynamic",
+    "obsoletes",
+    "obsoletes_dist",
+    "platforms",
+    "provides",
+    "provides_dist",
+    "provides_extra",
+    "requires",
+    "requires_dist",
+    "requires_external",
+    "supported_platforms",
+}
+
+
+def _parse_keywords(data: str) -> List[str]:
+    """Split a string of comma-separate keyboards into a list of keywords."""
+    return [k.strip() for k in data.split(",")]
+
+
+def _parse_project_urls(data: List[str]) -> Dict[str, str]:
+    """Parse a list of label/URL string pairings separated by a comma."""
+    urls = {}
+    for pair in data:
+        # Our logic is slightly tricky here as we want to try and do
+        # *something* reasonable with malformed data.
+        #
+        # The main thing that we have to worry about, is data that does
+        # not have a ',' at all to split the label from the Value. There
+        # isn't a singular right answer here, and we will fail validation
+        # later on (if the caller is validating) so it doesn't *really*
+        # matter, but since the missing value has to be an empty str
+        # and our return value is dict[str, str], if we let the key
+        # be the missing value, then they'd have multiple '' values that
+        # overwrite each other in a accumulating dict.
+        #
+        # The other potentional issue is that it's possible to have the
+        # same label multiple times in the metadata, with no solid "right"
+        # answer with what to do in that case. As such, we'll do the only
+        # thing we can, which is treat the field as unparseable and add it
+        # to our list of unparsed fields.
+        parts = [p.strip() for p in pair.split(",", 1)]
+        parts.extend([""] * (max(0, 2 - len(parts))))  # Ensure 2 items
+
+        # TODO: The spec doesn't say anything about if the keys should be
+        #       considered case sensitive or not... logically they should
+        #       be case-preserving and case-insensitive, but doing that
+        #       would open up more cases where we might have duplicate
+        #       entries.
+        label, url = parts
+        if label in urls:
+            # The label already exists in our set of urls, so this field
+            # is unparseable, and we can just add the whole thing to our
+            # unparseable data and stop processing it.
+            raise KeyError("duplicate labels in project urls")
+        urls[label] = url
+
+    return urls
+
+
+def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
+    """Get the body of the message."""
+    # If our source is a str, then our caller has managed encodings for us,
+    # and we don't need to deal with it.
+    if isinstance(source, str):
+        payload: str = msg.get_payload()
+        return payload
+    # If our source is a bytes, then we're managing the encoding and we need
+    # to deal with it.
+    else:
+        bpayload: bytes = msg.get_payload(decode=True)
+        try:
+            return bpayload.decode("utf8", "strict")
+        except UnicodeDecodeError:
+            raise ValueError("payload in an invalid encoding")
+
+
+# The various parse_FORMAT functions here are intended to be as lenient as
+# possible in their parsing, while still returning a correctly typed
+# RawMetadata.
+#
+# To aid in this, we also generally want to do as little touching of the
+# data as possible, except where there are possibly some historic holdovers
+# that make valid data awkward to work with.
+#
+# While this is a lower level, intermediate format than our ``Metadata``
+# class, some light touch ups can make a massive difference in usability.
+
+# Map METADATA fields to RawMetadata.
+_EMAIL_TO_RAW_MAPPING = {
+    "author": "author",
+    "author-email": "author_email",
+    "classifier": "classifiers",
+    "description": "description",
+    "description-content-type": "description_content_type",
+    "download-url": "download_url",
+    "dynamic": "dynamic",
+    "home-page": "home_page",
+    "keywords": "keywords",
+    "license": "license",
+    "maintainer": "maintainer",
+    "maintainer-email": "maintainer_email",
+    "metadata-version": "metadata_version",
+    "name": "name",
+    "obsoletes": "obsoletes",
+    "obsoletes-dist": "obsoletes_dist",
+    "platform": "platforms",
+    "project-url": "project_urls",
+    "provides": "provides",
+    "provides-dist": "provides_dist",
+    "provides-extra": "provides_extra",
+    "requires": "requires",
+    "requires-dist": "requires_dist",
+    "requires-external": "requires_external",
+    "requires-python": "requires_python",
+    "summary": "summary",
+    "supported-platform": "supported_platforms",
+    "version": "version",
+}
+
+
+def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
+    """Parse a distribution's metadata.
+
+    This function returns a two-item tuple of dicts. The first dict is of
+    recognized fields from the core metadata specification. Fields that can be
+    parsed and translated into Python's built-in types are converted
+    appropriately. All other fields are left as-is. Fields that are allowed to
+    appear multiple times are stored as lists.
+
+    The second dict contains all other fields from the metadata. This includes
+    any unrecognized fields. It also includes any fields which are expected to
+    be parsed into a built-in type but were not formatted appropriately. Finally,
+    any fields that are expected to appear only once but are repeated are
+    included in this dict.
+
+    """
+    raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
+    unparsed: Dict[str, List[str]] = {}
+
+    if isinstance(data, str):
+        parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
+    else:
+        parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
+
+    # We have to wrap parsed.keys() in a set, because in the case of multiple
+    # values for a key (a list), the key will appear multiple times in the
+    # list of keys, but we're avoiding that by using get_all().
+    for name in frozenset(parsed.keys()):
+        # Header names in RFC are case insensitive, so we'll normalize to all
+        # lower case to make comparisons easier.
+        name = name.lower()
+
+        # We use get_all() here, even for fields that aren't multiple use,
+        # because otherwise someone could have e.g. two Name fields, and we
+        # would just silently ignore it rather than doing something about it.
+        headers = parsed.get_all(name)
+
+        # The way the email module works when parsing bytes is that it
+        # unconditionally decodes the bytes as ascii using the surrogateescape
+        # handler. When you pull that data back out (such as with get_all() ),
+        # it looks to see if the str has any surrogate escapes, and if it does
+        # it wraps it in a Header object instead of returning the string.
+        #
+        # As such, we'll look for those Header objects, and fix up the encoding.
+        value = []
+        # Flag if we have run into any issues processing the headers, thus
+        # signalling that the data belongs in 'unparsed'.
+        valid_encoding = True
+        for h in headers:
+            # It's unclear if this can return more types than just a Header or
+            # a str, so we'll just assert here to make sure.
+            assert isinstance(h, (email.header.Header, str))
+
+            # If it's a header object, we need to do our little dance to get
+            # the real data out of it. In cases where there is invalid data
+            # we're going to end up with mojibake, but there's no obvious, good
+            # way around that without reimplementing parts of the Header object
+            # ourselves.
+            #
+            # That should be fine since, if mojibacked happens, this key is
+            # going into the unparsed dict anyways.
+            if isinstance(h, email.header.Header):
+                # The Header object stores it's data as chunks, and each chunk
+                # can be independently encoded, so we'll need to check each
+                # of them.
+                chunks: List[Tuple[bytes, Optional[str]]] = []
+                for bin, encoding in email.header.decode_header(h):
+                    try:
+                        bin.decode("utf8", "strict")
+                    except UnicodeDecodeError:
+                        # Enable mojibake.
+                        encoding = "latin1"
+                        valid_encoding = False
+                    else:
+                        encoding = "utf8"
+                    chunks.append((bin, encoding))
+
+                # Turn our chunks back into a Header object, then let that
+                # Header object do the right thing to turn them into a
+                # string for us.
+                value.append(str(email.header.make_header(chunks)))
+            # This is already a string, so just add it.
+            else:
+                value.append(h)
+
+        # We've processed all of our values to get them into a list of str,
+        # but we may have mojibake data, in which case this is an unparsed
+        # field.
+        if not valid_encoding:
+            unparsed[name] = value
+            continue
+
+        raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
+        if raw_name is None:
+            # This is a bit of a weird situation, we've encountered a key that
+            # we don't know what it means, so we don't know whether it's meant
+            # to be a list or not.
+            #
+            # Since we can't really tell one way or another, we'll just leave it
+            # as a list, even though it may be a single item list, because that's
+            # what makes the most sense for email headers.
+            unparsed[name] = value
+            continue
+
+        # If this is one of our string fields, then we'll check to see if our
+        # value is a list of a single item. If it is then we'll assume that
+        # it was emitted as a single string, and unwrap the str from inside
+        # the list.
+        #
+        # If it's any other kind of data, then we haven't the faintest clue
+        # what we should parse it as, and we have to just add it to our list
+        # of unparsed stuff.
+        if raw_name in _STRING_FIELDS and len(value) == 1:
+            raw[raw_name] = value[0]
+        # If this is one of our list of string fields, then we can just assign
+        # the value, since email *only* has strings, and our get_all() call
+        # above ensures that this is a list.
+        elif raw_name in _LIST_STRING_FIELDS:
+            raw[raw_name] = value
+        # Special Case: Keywords
+        # The keywords field is implemented in the metadata spec as a str,
+        # but it conceptually is a list of strings, and is serialized using
+        # ", ".join(keywords), so we'll do some light data massaging to turn
+        # this into what it logically is.
+        elif raw_name == "keywords" and len(value) == 1:
+            raw[raw_name] = _parse_keywords(value[0])
+        # Special Case: Project-URL
+        # The project urls is implemented in the metadata spec as a list of
+        # specially-formatted strings that represent a key and a value, which
+        # is fundamentally a mapping, however the email format doesn't support
+        # mappings in a sane way, so it was crammed into a list of strings
+        # instead.
+        #
+        # We will do a little light data massaging to turn this into a map as
+        # it logically should be.
+        elif raw_name == "project_urls":
+            try:
+                raw[raw_name] = _parse_project_urls(value)
+            except KeyError:
+                unparsed[name] = value
+        # Nothing that we've done has managed to parse this, so it'll just
+        # throw it in our unparseable data and move on.
+        else:
+            unparsed[name] = value
+
+    # We need to support getting the Description from the message payload in
+    # addition to getting it from the the headers. This does mean, though, there
+    # is the possibility of it being set both ways, in which case we put both
+    # in 'unparsed' since we don't know which is right.
+    try:
+        payload = _get_payload(parsed, data)
+    except ValueError:
+        unparsed.setdefault("description", []).append(
+            parsed.get_payload(decode=isinstance(data, bytes))
+        )
+    else:
+        if payload:
+            # Check to see if we've already got a description, if so then both
+            # it, and this body move to unparseable.
+            if "description" in raw:
+                description_header = cast(str, raw.pop("description"))
+                unparsed.setdefault("description", []).extend(
+                    [description_header, payload]
+                )
+            elif "description" in unparsed:
+                unparsed["description"].append(payload)
+            else:
+                raw["description"] = payload
+
+    # We need to cast our `raw` to a metadata, because a TypedDict only support
+    # literal key names, but we're computing our key names on purpose, but the
+    # way this function is implemented, our `TypedDict` can only have valid key
+    # names.
+    return cast(RawMetadata, raw), unparsed
-- 
cgit v1.2.3