diff options
Diffstat (limited to 'third_party/python/setuptools/pkg_resources')
47 files changed, 14655 insertions, 8287 deletions
diff --git a/third_party/python/setuptools/pkg_resources/__init__.py b/third_party/python/setuptools/pkg_resources/__init__.py index 99b7f68075..3fc97af475 100644 --- a/third_party/python/setuptools/pkg_resources/__init__.py +++ b/third_party/python/setuptools/pkg_resources/__init__.py @@ -12,6 +12,9 @@ The package resource API is designed to work with normal filesystem packages, .egg files, and unpacked .egg files. It can also work in a limited way with .zip files and with custom PEP 302 loaders that support the ``get_data()`` method. + +This module is deprecated. Users are directed to :mod:`importlib.resources`, +:mod:`importlib.metadata` and :pypi:`packaging` instead. """ import sys @@ -34,7 +37,6 @@ import email.parser import errno import tempfile import textwrap -import itertools import inspect import ntpath import posixpath @@ -54,8 +56,10 @@ except NameError: # capture these to bypass sandboxing from os import utime + try: from os import mkdir, rename, unlink + WRITE_SUPPORT = True except ImportError: # no write support, probably under GAE @@ -66,17 +70,26 @@ from os.path import isdir, split try: import importlib.machinery as importlib_machinery + # access attribute to force import under delayed import mechanisms. importlib_machinery.__name__ except ImportError: importlib_machinery = None -from pkg_resources.extern import appdirs +from pkg_resources.extern.jaraco.text import ( + yield_lines, + drop_comment, + join_continuation, +) + +from pkg_resources.extern import platformdirs from pkg_resources.extern import packaging + __import__('pkg_resources.extern.packaging.version') __import__('pkg_resources.extern.packaging.specifiers') __import__('pkg_resources.extern.packaging.requirements') __import__('pkg_resources.extern.packaging.markers') +__import__('pkg_resources.extern.packaging.utils') if sys.version_info < (3, 5): raise RuntimeError("Python 3.5 or later is required") @@ -102,6 +115,17 @@ _namespace_handlers = None _namespace_packages = None +warnings.warn( + "pkg_resources is deprecated as an API. " + "See https://setuptools.pypa.io/en/latest/pkg_resources.html", + DeprecationWarning, + stacklevel=2 +) + + +_PEP440_FALLBACK = re.compile(r"^v?(?P<safe>(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.I) + + class PEP440Warning(RuntimeWarning): """ Used when there is an issue with a version or specifier not complying with @@ -109,11 +133,7 @@ class PEP440Warning(RuntimeWarning): """ -def parse_version(v): - try: - return packaging.version.Version(v) - except packaging.version.InvalidVersion: - return packaging.version.LegacyVersion(v) +parse_version = packaging.version.Version _state_vars = {} @@ -185,51 +205,87 @@ def get_supported_platform(): __all__ = [ # Basic resource access and distribution/entry point discovery - 'require', 'run_script', 'get_provider', 'get_distribution', - 'load_entry_point', 'get_entry_map', 'get_entry_info', + 'require', + 'run_script', + 'get_provider', + 'get_distribution', + 'load_entry_point', + 'get_entry_map', + 'get_entry_info', 'iter_entry_points', - 'resource_string', 'resource_stream', 'resource_filename', - 'resource_listdir', 'resource_exists', 'resource_isdir', - + 'resource_string', + 'resource_stream', + 'resource_filename', + 'resource_listdir', + 'resource_exists', + 'resource_isdir', # Environmental control - 'declare_namespace', 'working_set', 'add_activation_listener', - 'find_distributions', 'set_extraction_path', 'cleanup_resources', + 'declare_namespace', + 'working_set', + 'add_activation_listener', + 'find_distributions', + 'set_extraction_path', + 'cleanup_resources', 'get_default_cache', - # Primary implementation classes - 'Environment', 'WorkingSet', 'ResourceManager', - 'Distribution', 'Requirement', 'EntryPoint', - + 'Environment', + 'WorkingSet', + 'ResourceManager', + 'Distribution', + 'Requirement', + 'EntryPoint', # Exceptions - 'ResolutionError', 'VersionConflict', 'DistributionNotFound', - 'UnknownExtra', 'ExtractionError', - + 'ResolutionError', + 'VersionConflict', + 'DistributionNotFound', + 'UnknownExtra', + 'ExtractionError', # Warnings 'PEP440Warning', - # Parsing functions and string utilities - 'parse_requirements', 'parse_version', 'safe_name', 'safe_version', - 'get_platform', 'compatible_platforms', 'yield_lines', 'split_sections', - 'safe_extra', 'to_filename', 'invalid_marker', 'evaluate_marker', - + 'parse_requirements', + 'parse_version', + 'safe_name', + 'safe_version', + 'get_platform', + 'compatible_platforms', + 'yield_lines', + 'split_sections', + 'safe_extra', + 'to_filename', + 'invalid_marker', + 'evaluate_marker', # filesystem utilities - 'ensure_directory', 'normalize_path', - + 'ensure_directory', + 'normalize_path', # Distribution "precedence" constants - 'EGG_DIST', 'BINARY_DIST', 'SOURCE_DIST', 'CHECKOUT_DIST', 'DEVELOP_DIST', - + 'EGG_DIST', + 'BINARY_DIST', + 'SOURCE_DIST', + 'CHECKOUT_DIST', + 'DEVELOP_DIST', # "Provider" interfaces, implementations, and registration/lookup APIs - 'IMetadataProvider', 'IResourceProvider', 'FileMetadata', - 'PathMetadata', 'EggMetadata', 'EmptyProvider', 'empty_provider', - 'NullProvider', 'EggProvider', 'DefaultProvider', 'ZipProvider', - 'register_finder', 'register_namespace_handler', 'register_loader_type', - 'fixup_namespace_packages', 'get_importer', - + 'IMetadataProvider', + 'IResourceProvider', + 'FileMetadata', + 'PathMetadata', + 'EggMetadata', + 'EmptyProvider', + 'empty_provider', + 'NullProvider', + 'EggProvider', + 'DefaultProvider', + 'ZipProvider', + 'register_finder', + 'register_namespace_handler', + 'register_loader_type', + 'fixup_namespace_packages', + 'get_importer', # Warnings 'PkgResourcesDeprecationWarning', - # Deprecated/backward compatibility only - 'run_main', 'AvailableDistributions', + 'run_main', + 'AvailableDistributions', ] @@ -288,8 +344,10 @@ class ContextualVersionConflict(VersionConflict): class DistributionNotFound(ResolutionError): """A requested distribution was not found""" - _template = ("The '{self.req}' distribution was not found " - "and is required by {self.requirers_str}") + _template = ( + "The '{self.req}' distribution was not found " + "and is required by {self.requirers_str}" + ) @property def req(self): @@ -383,7 +441,8 @@ def get_build_platform(): version = _macos_vers() machine = os.uname()[4].replace(" ", "_") return "macosx-%d.%d-%s" % ( - int(version[0]), int(version[1]), + int(version[0]), + int(version[1]), _macos_arch(machine), ) except ValueError: @@ -424,15 +483,18 @@ def compatible_platforms(provided, required): if provDarwin: dversion = int(provDarwin.group(1)) macosversion = "%s.%s" % (reqMac.group(1), reqMac.group(2)) - if dversion == 7 and macosversion >= "10.3" or \ - dversion == 8 and macosversion >= "10.4": + if ( + dversion == 7 + and macosversion >= "10.3" + or dversion == 8 + and macosversion >= "10.4" + ): return True # egg isn't macOS or legacy darwin return False # are they the same major version and machine type? - if provMac.group(1) != reqMac.group(1) or \ - provMac.group(3) != reqMac.group(3): + if provMac.group(1) != reqMac.group(1) or provMac.group(3) != reqMac.group(3): return False # is the required OS major update >= the provided one? @@ -494,8 +556,8 @@ class IMetadataProvider: def get_metadata_lines(name): """Yield named metadata resource as list of non-blank non-comment lines - Leading and trailing whitespace is stripped from each line, and lines - with ``#`` as the first non-blank character are omitted.""" + Leading and trailing whitespace is stripped from each line, and lines + with ``#`` as the first non-blank character are omitted.""" def metadata_isdir(name): """Is the named metadata a directory? (like ``os.path.isdir()``)""" @@ -543,6 +605,7 @@ class WorkingSet: self.entries = [] self.entry_keys = {} self.by_key = {} + self.normalized_to_canonical_keys = {} self.callbacks = [] if entries is None: @@ -623,6 +686,14 @@ class WorkingSet: is returned. """ dist = self.by_key.get(req.key) + + if dist is None: + canonical_key = self.normalized_to_canonical_keys.get(req.key) + + if canonical_key is not None: + req.key = canonical_key + dist = self.by_key.get(canonical_key) + if dist is not None and dist not in req: # XXX add more info raise VersionConflict(dist, req) @@ -691,14 +762,22 @@ class WorkingSet: return self.by_key[dist.key] = dist + normalized_name = packaging.utils.canonicalize_name(dist.key) + self.normalized_to_canonical_keys[normalized_name] = dist.key if dist.key not in keys: keys.append(dist.key) if dist.key not in keys2: keys2.append(dist.key) self._added_new(dist) - def resolve(self, requirements, env=None, installer=None, - replace_conflicting=False, extras=None): + def resolve( + self, + requirements, + env=None, + installer=None, + replace_conflicting=False, + extras=None, + ): """List all distributions needed to (recursively) meet `requirements` `requirements` must be a sequence of ``Requirement`` objects. `env`, @@ -747,33 +826,9 @@ class WorkingSet: if not req_extras.markers_pass(req, extras): continue - dist = best.get(req.key) - if dist is None: - # Find the best distribution and add it to the map - dist = self.by_key.get(req.key) - if dist is None or (dist not in req and replace_conflicting): - ws = self - if env is None: - if dist is None: - env = Environment(self.entries) - else: - # Use an empty environment and workingset to avoid - # any further conflicts with the conflicting - # distribution - env = Environment([]) - ws = WorkingSet([]) - dist = best[req.key] = env.best_match( - req, ws, installer, - replace_conflicting=replace_conflicting - ) - if dist is None: - requirers = required_by.get(req, None) - raise DistributionNotFound(req, requirers) - to_activate.append(dist) - if dist not in req: - # Oops, the "best" so far conflicts with a dependency - dependent_req = required_by[req] - raise VersionConflict(dist, req).with_context(dependent_req) + dist = self._resolve_dist( + req, best, replace_conflicting, env, installer, required_by, to_activate + ) # push the new requirements onto the stack new_requirements = dist.requires(req.extras)[::-1] @@ -789,8 +844,38 @@ class WorkingSet: # return list of distros to activate return to_activate - def find_plugins( - self, plugin_env, full_env=None, installer=None, fallback=True): + def _resolve_dist( + self, req, best, replace_conflicting, env, installer, required_by, to_activate + ): + dist = best.get(req.key) + if dist is None: + # Find the best distribution and add it to the map + dist = self.by_key.get(req.key) + if dist is None or (dist not in req and replace_conflicting): + ws = self + if env is None: + if dist is None: + env = Environment(self.entries) + else: + # Use an empty environment and workingset to avoid + # any further conflicts with the conflicting + # distribution + env = Environment([]) + ws = WorkingSet([]) + dist = best[req.key] = env.best_match( + req, ws, installer, replace_conflicting=replace_conflicting + ) + if dist is None: + requirers = required_by.get(req, None) + raise DistributionNotFound(req, requirers) + to_activate.append(dist) + if dist not in req: + # Oops, the "best" so far conflicts with a dependency + dependent_req = required_by[req] + raise VersionConflict(dist, req).with_context(dependent_req) + return dist + + def find_plugins(self, plugin_env, full_env=None, installer=None, fallback=True): """Find all activatable distributions in `plugin_env` Example usage:: @@ -843,9 +928,7 @@ class WorkingSet: list(map(shadow_set.add, self)) for project_name in plugin_projects: - for dist in plugin_env[project_name]: - req = [dist.as_requirement()] try: @@ -909,15 +992,19 @@ class WorkingSet: def __getstate__(self): return ( - self.entries[:], self.entry_keys.copy(), self.by_key.copy(), - self.callbacks[:] + self.entries[:], + self.entry_keys.copy(), + self.by_key.copy(), + self.normalized_to_canonical_keys.copy(), + self.callbacks[:], ) - def __setstate__(self, e_k_b_c): - entries, keys, by_key, callbacks = e_k_b_c + def __setstate__(self, e_k_b_n_c): + entries, keys, by_key, normalized_to_canonical_keys, callbacks = e_k_b_n_c self.entries = entries[:] self.entry_keys = keys.copy() self.by_key = by_key.copy() + self.normalized_to_canonical_keys = normalized_to_canonical_keys.copy() self.callbacks = callbacks[:] @@ -945,8 +1032,8 @@ class Environment: """Searchable snapshot of distributions on a search path""" def __init__( - self, search_path=None, platform=get_supported_platform(), - python=PY_MAJOR): + self, search_path=None, platform=get_supported_platform(), python=PY_MAJOR + ): """Snapshot distributions available on a search path Any distributions found on `search_path` are added to the environment. @@ -1013,16 +1100,14 @@ class Environment: return self._distmap.get(distribution_key, []) def add(self, dist): - """Add `dist` if we ``can_add()`` it and it has not already been added - """ + """Add `dist` if we ``can_add()`` it and it has not already been added""" if self.can_add(dist) and dist.has_version(): dists = self._distmap.setdefault(dist.key, []) if dist not in dists: dists.append(dist) dists.sort(key=operator.attrgetter('hashcmp'), reverse=True) - def best_match( - self, req, working_set, installer=None, replace_conflicting=False): + def best_match(self, req, working_set, installer=None, replace_conflicting=False): """Find distribution best matching `req` and usable on `working_set` This calls the ``find(req)`` method of the `working_set` to see if a @@ -1109,6 +1194,7 @@ class ExtractionError(RuntimeError): class ResourceManager: """Manage resource extraction and packages""" + extraction_path = None def __init__(self): @@ -1120,9 +1206,7 @@ class ResourceManager: def resource_isdir(self, package_or_requirement, resource_name): """Is the named resource an existing directory?""" - return get_provider(package_or_requirement).resource_isdir( - resource_name - ) + return get_provider(package_or_requirement).resource_isdir(resource_name) def resource_filename(self, package_or_requirement, resource_name): """Return a true filesystem path for specified resource""" @@ -1144,9 +1228,7 @@ class ResourceManager: def resource_listdir(self, package_or_requirement, resource_name): """List the contents of the named resource directory""" - return get_provider(package_or_requirement).resource_listdir( - resource_name - ) + return get_provider(package_or_requirement).resource_listdir(resource_name) def extraction_error(self): """Give an error message for problems extracting file(s)""" @@ -1154,7 +1236,8 @@ class ResourceManager: old_exc = sys.exc_info()[1] cache_path = self.extraction_path or get_default_cache() - tmpl = textwrap.dedent(""" + tmpl = textwrap.dedent( + """ Can't extract file(s) to egg cache The following error occurred while trying to extract file(s) @@ -1169,7 +1252,8 @@ class ResourceManager: Perhaps your account does not have write access to this directory? You can change the cache directory by setting the PYTHON_EGG_CACHE environment variable to point to an accessible directory. - """).lstrip() + """ + ).lstrip() err = ExtractionError(tmpl.format(**locals())) err.manager = self err.cache_path = cache_path @@ -1268,9 +1352,7 @@ class ResourceManager: ``cleanup_resources()``.) """ if self.cached_files: - raise ValueError( - "Can't change extraction path, files already extracted" - ) + raise ValueError("Can't change extraction path, files already extracted") self.extraction_path = path @@ -1294,9 +1376,8 @@ def get_default_cache(): or a platform-relevant user cache dir for an app named "Python-Eggs". """ - return ( - os.environ.get('PYTHON_EGG_CACHE') - or appdirs.user_cache_dir(appname='Python-Eggs') + return os.environ.get('PYTHON_EGG_CACHE') or platformdirs.user_cache_dir( + appname='Python-Eggs' ) @@ -1320,6 +1401,38 @@ def safe_version(version): return re.sub('[^A-Za-z0-9.]+', '-', version) +def _forgiving_version(version): + """Fallback when ``safe_version`` is not safe enough + >>> parse_version(_forgiving_version('0.23ubuntu1')) + <Version('0.23.dev0+sanitized.ubuntu1')> + >>> parse_version(_forgiving_version('0.23-')) + <Version('0.23.dev0+sanitized')> + >>> parse_version(_forgiving_version('0.-_')) + <Version('0.dev0+sanitized')> + >>> parse_version(_forgiving_version('42.+?1')) + <Version('42.dev0+sanitized.1')> + >>> parse_version(_forgiving_version('hello world')) + <Version('0.dev0+sanitized.hello.world')> + """ + version = version.replace(' ', '.') + match = _PEP440_FALLBACK.search(version) + if match: + safe = match["safe"] + rest = version[len(safe):] + else: + safe = "0" + rest = version + local = f"sanitized.{_safe_segment(rest)}".strip(".") + return f"{safe}.dev0+{local}" + + +def _safe_segment(segment): + """Convert an arbitrary string into a safe segment""" + segment = re.sub('[^A-Za-z0-9.]+', '-', segment) + segment = re.sub('-[^A-Za-z0-9]+', '-', segment) + return re.sub(r'\.[^A-Za-z0-9]+', '.', segment).strip(".-") + + def safe_extra(extra): """Convert an arbitrary string to a standard 'extra' name @@ -1433,8 +1546,9 @@ class NullProvider: script = 'scripts/' + script_name if not self.has_metadata(script): raise ResolutionError( - "Script {script!r} not found in metadata at {self.egg_info!r}" - .format(**locals()), + "Script {script!r} not found in metadata at {self.egg_info!r}".format( + **locals() + ), ) script_text = self.get_metadata(script).replace('\r\n', '\n') script_text = script_text.replace('\r', '\n') @@ -1447,8 +1561,12 @@ class NullProvider: exec(code, namespace, namespace) else: from linecache import cache + cache[script_filename] = ( - len(script_text), 0, script_text.split('\n'), script_filename + len(script_text), + 0, + script_text.split('\n'), + script_filename, ) script_code = compile(script_text, script_filename, 'exec') exec(script_code, namespace, namespace) @@ -1478,7 +1596,7 @@ class NullProvider: def _validate_resource_path(path): """ Validate the resource paths according to the docs. - https://setuptools.readthedocs.io/en/latest/pkg_resources.html#basic-resource-access + https://setuptools.pypa.io/en/latest/pkg_resources.html#basic-resource-access >>> warned = getfixture('recwarn') >>> warnings.simplefilter('always') @@ -1528,9 +1646,9 @@ is not allowed. AttributeError: ... """ invalid = ( - os.path.pardir in path.split(posixpath.sep) or - posixpath.isabs(path) or - ntpath.isabs(path) + os.path.pardir in path.split(posixpath.sep) + or posixpath.isabs(path) + or ntpath.isabs(path) ) if not invalid: return @@ -1543,10 +1661,9 @@ is not allowed. # for compatibility, warn; in future # raise ValueError(msg) - warnings.warn( + issue_warning( msg[:-1] + " and will raise exceptions in a future release.", DeprecationWarning, - stacklevel=4, ) def _get(self, path): @@ -1575,7 +1692,7 @@ class EggProvider(NullProvider): """Provider based on a virtual filesystem""" def __init__(self, module): - NullProvider.__init__(self, module) + super().__init__(module) self._setup_prefix() def _setup_prefix(self): @@ -1612,7 +1729,10 @@ class DefaultProvider(EggProvider): @classmethod def _register(cls): - loader_names = 'SourceFileLoader', 'SourcelessFileLoader', + loader_names = ( + 'SourceFileLoader', + 'SourcelessFileLoader', + ) for name in loader_names: loader_cls = getattr(importlib_machinery, name, type(None)) register_loader_type(loader_cls, cls) @@ -1672,6 +1792,7 @@ class MemoizedZipManifests(ZipManifests): """ Memoized zipfile manifests. """ + manifest_mod = collections.namedtuple('manifest_mod', 'manifest mtime') def load(self, path): @@ -1695,7 +1816,7 @@ class ZipProvider(EggProvider): _zip_manifests = MemoizedZipManifests() def __init__(self, module): - EggProvider.__init__(self, module) + super().__init__(module) self.zip_pre = self.loader.archive + os.sep def _zipinfo_name(self, fspath): @@ -1705,20 +1826,16 @@ class ZipProvider(EggProvider): if fspath == self.loader.archive: return '' if fspath.startswith(self.zip_pre): - return fspath[len(self.zip_pre):] - raise AssertionError( - "%s is not a subpath of %s" % (fspath, self.zip_pre) - ) + return fspath[len(self.zip_pre) :] + raise AssertionError("%s is not a subpath of %s" % (fspath, self.zip_pre)) def _parts(self, zip_path): # Convert a zipfile subpath into an egg-relative path part list. # pseudo-fs path fspath = self.zip_pre + zip_path if fspath.startswith(self.egg_root + os.sep): - return fspath[len(self.egg_root) + 1:].split(os.sep) - raise AssertionError( - "%s is not a subpath of %s" % (fspath, self.egg_root) - ) + return fspath[len(self.egg_root) + 1 :].split(os.sep) + raise AssertionError("%s is not a subpath of %s" % (fspath, self.egg_root)) @property def zipinfo(self): @@ -1746,26 +1863,22 @@ class ZipProvider(EggProvider): timestamp = time.mktime(date_time) return timestamp, size - def _extract_resource(self, manager, zip_path): - + # FIXME: 'ZipProvider._extract_resource' is too complex (12) + def _extract_resource(self, manager, zip_path): # noqa: C901 if zip_path in self._index(): for name in self._index()[zip_path]: - last = self._extract_resource( - manager, os.path.join(zip_path, name) - ) + last = self._extract_resource(manager, os.path.join(zip_path, name)) # return the extracted directory name return os.path.dirname(last) timestamp, size = self._get_date_and_size(self.zipinfo[zip_path]) if not WRITE_SUPPORT: - raise IOError('"os.rename" and "os.unlink" are not supported ' - 'on this platform') - try: - - real_path = manager.get_cache_path( - self.egg_name, self._parts(zip_path) + raise IOError( + '"os.rename" and "os.unlink" are not supported ' 'on this platform' ) + try: + real_path = manager.get_cache_path(self.egg_name, self._parts(zip_path)) if self._is_current(real_path, zip_path): return real_path @@ -1984,7 +2097,7 @@ def find_eggs_in_zip(importer, path_item, only=False): dists = find_eggs_in_zip(zipimport.zipimporter(subpath), subpath) for dist in dists: yield dist - elif subitem.lower().endswith('.dist-info'): + elif subitem.lower().endswith(('.dist-info', '.egg-info')): subpath = os.path.join(path_item, subitem) submeta = EggMetadata(zipimport.zipimporter(subpath)) submeta.egg_info = subpath @@ -2001,61 +2114,21 @@ def find_nothing(importer, path_item, only=False): register_finder(object, find_nothing) -def _by_version_descending(names): - """ - Given a list of filenames, return them in descending order - by version number. - - >>> names = 'bar', 'foo', 'Python-2.7.10.egg', 'Python-2.7.2.egg' - >>> _by_version_descending(names) - ['Python-2.7.10.egg', 'Python-2.7.2.egg', 'foo', 'bar'] - >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.egg' - >>> _by_version_descending(names) - ['Setuptools-1.2.3.egg', 'Setuptools-1.2.3b1.egg'] - >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.post1.egg' - >>> _by_version_descending(names) - ['Setuptools-1.2.3.post1.egg', 'Setuptools-1.2.3b1.egg'] - """ - def _by_version(name): - """ - Parse each component of the filename - """ - name, ext = os.path.splitext(name) - parts = itertools.chain(name.split('-'), [ext]) - return [packaging.version.parse(part) for part in parts] - - return sorted(names, key=_by_version, reverse=True) - - def find_on_path(importer, path_item, only=False): """Yield distributions accessible on a sys.path directory""" path_item = _normalize_cached(path_item) if _is_unpacked_egg(path_item): yield Distribution.from_filename( - path_item, metadata=PathMetadata( - path_item, os.path.join(path_item, 'EGG-INFO') - ) + path_item, + metadata=PathMetadata(path_item, os.path.join(path_item, 'EGG-INFO')), ) return - entries = ( - os.path.join(path_item, child) - for child in safe_listdir(path_item) - ) - - # for performance, before sorting by version, - # screen entries for only those that will yield - # distributions - filtered = ( - entry - for entry in entries - if dist_factory(path_item, entry, only) - ) + entries = (os.path.join(path_item, child) for child in safe_listdir(path_item)) # scan for .egg and .egg-info in directory - path_item_entries = _by_version_descending(filtered) - for entry in path_item_entries: + for entry in sorted(entries): fullpath = os.path.join(path_item, entry) factory = dist_factory(path_item, entry, only) for dist in factory(fullpath): @@ -2066,19 +2139,18 @@ def dist_factory(path_item, entry, only): """Return a dist_factory for the given entry.""" lower = entry.lower() is_egg_info = lower.endswith('.egg-info') - is_dist_info = ( - lower.endswith('.dist-info') and - os.path.isdir(os.path.join(path_item, entry)) + is_dist_info = lower.endswith('.dist-info') and os.path.isdir( + os.path.join(path_item, entry) ) is_meta = is_egg_info or is_dist_info return ( distributions_from_metadata - if is_meta else - find_distributions - if not only and _is_egg_path(entry) else - resolve_egg_link - if not only and lower.endswith('.egg-link') else - NoDists() + if is_meta + else find_distributions + if not only and _is_egg_path(entry) + else resolve_egg_link + if not only and lower.endswith('.egg-link') + else NoDists() ) @@ -2090,6 +2162,7 @@ class NoDists: >>> list(NoDists()('anything')) [] """ + def __bool__(self): return False @@ -2124,7 +2197,10 @@ def distributions_from_metadata(path): metadata = FileMetadata(path) entry = os.path.basename(path) yield Distribution.from_location( - root, entry, metadata, precedence=DEVELOP_DIST, + root, + entry, + metadata, + precedence=DEVELOP_DIST, ) @@ -2146,17 +2222,16 @@ def resolve_egg_link(path): """ referenced_paths = non_empty_lines(path) resolved_paths = ( - os.path.join(os.path.dirname(path), ref) - for ref in referenced_paths + os.path.join(os.path.dirname(path), ref) for ref in referenced_paths ) dist_groups = map(find_distributions, resolved_paths) return next(dist_groups, ()) -register_finder(pkgutil.ImpImporter, find_on_path) +if hasattr(pkgutil, 'ImpImporter'): + register_finder(pkgutil.ImpImporter, find_on_path) -if hasattr(importlib_machinery, 'FileFinder'): - register_finder(importlib_machinery.FileFinder, find_on_path) +register_finder(importlib_machinery.FileFinder, find_on_path) _declare_state('dict', _namespace_handlers={}) _declare_state('dict', _namespace_packages={}) @@ -2189,12 +2264,14 @@ def _handle_ns(packageName, path_item): # use find_spec (PEP 451) and fall-back to find_module (PEP 302) try: - loader = importer.find_spec(packageName).loader + spec = importer.find_spec(packageName) except AttributeError: # capture warnings due to #1111 with warnings.catch_warnings(): warnings.simplefilter("ignore") loader = importer.find_module(packageName) + else: + loader = spec.loader if spec else None if loader is None: return None @@ -2252,6 +2329,15 @@ def _rebuild_mod_path(orig_path, package_name, module): def declare_namespace(packageName): """Declare that package 'packageName' is a namespace package""" + msg = ( + f"Deprecated call to `pkg_resources.declare_namespace({packageName!r})`.\n" + "Implementing implicit namespace packages (as specified in PEP 420) " + "is preferred to `pkg_resources.declare_namespace`. " + "See https://setuptools.pypa.io/en/latest/references/" + "keywords.html#keyword-namespace-packages" + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + _imp.acquire_lock() try: if packageName in _namespace_packages: @@ -2308,11 +2394,11 @@ def file_ns_handler(importer, path_item, packageName, module): return subpath -register_namespace_handler(pkgutil.ImpImporter, file_ns_handler) -register_namespace_handler(zipimport.zipimporter, file_ns_handler) +if hasattr(pkgutil, 'ImpImporter'): + register_namespace_handler(pkgutil.ImpImporter, file_ns_handler) -if hasattr(importlib_machinery, 'FileFinder'): - register_namespace_handler(importlib_machinery.FileFinder, file_ns_handler) +register_namespace_handler(zipimport.zipimporter, file_ns_handler) +register_namespace_handler(importlib_machinery.FileFinder, file_ns_handler) def null_ns_handler(importer, path_item, packageName, module): @@ -2324,8 +2410,7 @@ register_namespace_handler(object, null_ns_handler) def normalize_path(filename): """Normalize a file/dir name for comparison purposes""" - return os.path.normcase(os.path.realpath(os.path.normpath( - _cygwin_patch(filename)))) + return os.path.normcase(os.path.realpath(os.path.normpath(_cygwin_patch(filename)))) def _cygwin_patch(filename): # pragma: nocover @@ -2356,9 +2441,9 @@ def _is_egg_path(path): def _is_zip_egg(path): return ( - path.lower().endswith('.egg') and - os.path.isfile(path) and - zipfile.is_zipfile(path) + path.lower().endswith('.egg') + and os.path.isfile(path) + and zipfile.is_zipfile(path) ) @@ -2366,9 +2451,8 @@ def _is_unpacked_egg(path): """ Determine if given path appears to be an unpacked egg. """ - return ( - path.lower().endswith('.egg') and - os.path.isfile(os.path.join(path, 'EGG-INFO', 'PKG-INFO')) + return path.lower().endswith('.egg') and os.path.isfile( + os.path.join(path, 'EGG-INFO', 'PKG-INFO') ) @@ -2380,20 +2464,6 @@ def _set_parent_ns(packageName): setattr(sys.modules[parent], name, sys.modules[packageName]) -def yield_lines(strs): - """Yield non-empty/non-comment lines of a string or sequence""" - if isinstance(strs, str): - for s in strs.splitlines(): - s = s.strip() - # skip blank lines/comments - if s and not s.startswith('#'): - yield s - else: - for ss in strs: - for s in yield_lines(ss): - yield s - - MODULE = re.compile(r"\w+(\.\w+)*$").match EGG_NAME = re.compile( r""" @@ -2546,8 +2616,10 @@ def _version_from_file(lines): Given an iterable of lines from a Metadata file, return the value of the Version field, if present, or None otherwise. """ + def is_version_line(line): return line.lower().startswith('version:') + version_lines = filter(is_version_line, lines) line = next(iter(version_lines), '') _, _, value = line.partition(':') @@ -2556,12 +2628,19 @@ def _version_from_file(lines): class Distribution: """Wrap an actual or potential sys.path entry w/metadata""" + PKG_INFO = 'PKG-INFO' def __init__( - self, location=None, metadata=None, project_name=None, - version=None, py_version=PY_MAJOR, platform=None, - precedence=EGG_DIST): + self, + location=None, + metadata=None, + project_name=None, + version=None, + py_version=PY_MAJOR, + platform=None, + precedence=EGG_DIST, + ): self.project_name = safe_name(project_name or 'Unknown') if version is not None: self._version = safe_version(version) @@ -2584,8 +2663,13 @@ class Distribution: 'name', 'ver', 'pyver', 'plat' ) return cls( - location, metadata, project_name=project_name, version=version, - py_version=py_version, platform=platform, **kw + location, + metadata, + project_name=project_name, + version=version, + py_version=py_version, + platform=platform, + **kw, )._reload_version() def _reload_version(self): @@ -2594,7 +2678,7 @@ class Distribution: @property def hashcmp(self): return ( - self.parsed_version, + self._forgiving_parsed_version, self.precedence, self.key, self.location, @@ -2641,35 +2725,42 @@ class Distribution: @property def parsed_version(self): if not hasattr(self, "_parsed_version"): - self._parsed_version = parse_version(self.version) + try: + self._parsed_version = parse_version(self.version) + except packaging.version.InvalidVersion as ex: + info = f"(package: {self.project_name})" + if hasattr(ex, "add_note"): + ex.add_note(info) # PEP 678 + raise + raise packaging.version.InvalidVersion(f"{str(ex)} {info}") from None return self._parsed_version - def _warn_legacy_version(self): - LV = packaging.version.LegacyVersion - is_legacy = isinstance(self._parsed_version, LV) - if not is_legacy: - return + @property + def _forgiving_parsed_version(self): + try: + return self.parsed_version + except packaging.version.InvalidVersion as ex: + self._parsed_version = parse_version(_forgiving_version(self.version)) - # While an empty version is technically a legacy version and - # is not a valid PEP 440 version, it's also unlikely to - # actually come from someone and instead it is more likely that - # it comes from setuptools attempting to parse a filename and - # including it in the list. So for that we'll gate this warning - # on if the version is anything at all or not. - if not self.version: - return + notes = "\n".join(getattr(ex, "__notes__", [])) # PEP 678 + msg = f"""!!\n\n + ************************************************************************* + {str(ex)}\n{notes} + + This is a long overdue deprecation. + For the time being, `pkg_resources` will use `{self._parsed_version}` + as a replacement to avoid breaking existing environments, + but no future compatibility is guaranteed. - tmpl = textwrap.dedent(""" - '{project_name} ({version})' is being parsed as a legacy, - non PEP 440, - version. You may find odd behavior and sort order. - In particular it will be sorted as less than 0.0. It - is recommended to migrate to PEP 440 compatible - versions. - """).strip().replace('\n', ' ') + If you maintain package {self.project_name} you should implement + the relevant changes to adequate the project to PEP 440 immediately. + ************************************************************************* + \n\n!! + """ + warnings.warn(msg, DeprecationWarning) - warnings.warn(tmpl.format(**vars(self)), PEP440Warning) + return self._parsed_version @property def version(self): @@ -2679,9 +2770,9 @@ class Distribution: version = self._get_version() if version is None: path = self._get_metadata_path_for_display(self.PKG_INFO) - msg = ( - "Missing 'Version:' header and/or {} file at path: {}" - ).format(self.PKG_INFO, path) + msg = ("Missing 'Version:' header and/or {} file at path: {}").format( + self.PKG_INFO, path + ) raise ValueError(msg, self) from e return version @@ -2710,8 +2801,7 @@ class Distribution: reqs = dm.pop(extra) new_extra, _, marker = extra.partition(':') fails_marker = marker and ( - invalid_marker(marker) - or not evaluate_marker(marker) + invalid_marker(marker) or not evaluate_marker(marker) ) if fails_marker: reqs = [] @@ -2783,8 +2873,9 @@ class Distribution: def egg_name(self): """Return what this distribution's standard .egg filename should be""" filename = "%s-%s-py%s" % ( - to_filename(self.project_name), to_filename(self.version), - self.py_version or PY_MAJOR + to_filename(self.project_name), + to_filename(self.version), + self.py_version or PY_MAJOR, ) if self.platform: @@ -2814,17 +2905,13 @@ class Distribution: def __dir__(self): return list( set(super(Distribution, self).__dir__()) - | set( - attr for attr in self._provider.__dir__() - if not attr.startswith('_') - ) + | set(attr for attr in self._provider.__dir__() if not attr.startswith('_')) ) @classmethod def from_filename(cls, filename, metadata=None, **kw): return cls.from_location( - _normalize_cached(filename), os.path.basename(filename), metadata, - **kw + _normalize_cached(filename), os.path.basename(filename), metadata, **kw ) def as_requirement(self): @@ -2859,7 +2946,8 @@ class Distribution: """Return the EntryPoint object for `group`+`name`, or ``None``""" return self.get_entry_map(group).get(name) - def insert_on(self, path, loc=None, replace=False): + # FIXME: 'Distribution.insert_on' is too complex (13) + def insert_on(self, path, loc=None, replace=False): # noqa: C901 """Ensure self.location is on path If replace=False (default): @@ -2935,14 +3023,18 @@ class Distribution: nsp = dict.fromkeys(self._get_metadata('namespace_packages.txt')) loc = normalize_path(self.location) for modname in self._get_metadata('top_level.txt'): - if (modname not in sys.modules or modname in nsp - or modname in _namespace_packages): + if ( + modname not in sys.modules + or modname in nsp + or modname in _namespace_packages + ): continue if modname in ('pkg_resources', 'setuptools', 'site'): continue fn = getattr(sys.modules[modname], '__file__', None) - if fn and (normalize_path(fn).startswith(loc) or - fn.startswith(self.location)): + if fn and ( + normalize_path(fn).startswith(loc) or fn.startswith(self.location) + ): continue issue_warning( "Module %s was already imported from %s, but %s is being added" @@ -2955,6 +3047,9 @@ class Distribution: except ValueError: issue_warning("Unbuilt egg for " + repr(self)) return False + except SystemError: + # TODO: remove this except clause when python/cpython#103632 is fixed. + return False return True def clone(self, **kw): @@ -2994,6 +3089,7 @@ class DistInfoDistribution(Distribution): Wrap an actual or potential sys.path entry w/metadata, .dist-info style. """ + PKG_INFO = 'METADATA' EQEQ = re.compile(r"([\(,])\s*(\d.*?)\s*([,\)])") @@ -3029,12 +3125,12 @@ class DistInfoDistribution(Distribution): if not req.marker or req.marker.evaluate({'extra': extra}): yield req - common = frozenset(reqs_for_extra(None)) + common = types.MappingProxyType(dict.fromkeys(reqs_for_extra(None))) dm[None].extend(common) for extra in self._parsed_pkg_info.get_all('Provides-Extra') or []: s_extra = safe_extra(extra.strip()) - dm[s_extra] = list(frozenset(reqs_for_extra(extra)) - common) + dm[s_extra] = [r for r in reqs_for_extra(extra) if r not in common] return dm @@ -3060,25 +3156,12 @@ def issue_warning(*args, **kw): def parse_requirements(strs): - """Yield ``Requirement`` objects for each specification in `strs` + """ + Yield ``Requirement`` objects for each specification in `strs`. `strs` must be a string, or a (possibly-nested) iterable thereof. """ - # create a steppable iterator, so we can handle \-continuations - lines = iter(yield_lines(strs)) - - for line in lines: - # Drop comments -- a hash without a space may be in a URL. - if ' #' in line: - line = line[:line.find(' #')] - # If there is a line continuation, drop it, and append the next line. - if line.endswith('\\'): - line = line[:-2].strip() - try: - line += next(lines) - except StopIteration: - return - yield Requirement(line) + return map(Requirement, join_continuation(map(drop_comment, yield_lines(strs)))) class RequirementParseError(packaging.requirements.InvalidRequirement): @@ -3092,8 +3175,7 @@ class Requirement(packaging.requirements.Requirement): self.unsafe_name = self.name project_name = safe_name(self.name) self.project_name, self.key = project_name, project_name.lower() - self.specs = [ - (spec.operator, spec.version) for spec in self.specifier] + self.specs = [(spec.operator, spec.version) for spec in self.specifier] self.extras = tuple(map(safe_extra, self.extras)) self.hashCmp = ( self.key, @@ -3105,10 +3187,7 @@ class Requirement(packaging.requirements.Requirement): self.__hash = hash(self.hashCmp) def __eq__(self, other): - return ( - isinstance(other, Requirement) and - self.hashCmp == other.hashCmp - ) + return isinstance(other, Requirement) and self.hashCmp == other.hashCmp def __ne__(self, other): return not self == other @@ -3133,7 +3212,7 @@ class Requirement(packaging.requirements.Requirement): @staticmethod def parse(s): - req, = parse_requirements(s) + (req,) = parse_requirements(s) return req @@ -3236,6 +3315,15 @@ def _initialize(g=globals()): ) +class PkgResourcesDeprecationWarning(Warning): + """ + Base class for warning about deprecations in ``pkg_resources`` + + This class is not derived from ``DeprecationWarning``, and as such is + visible by default. + """ + + @_call_aside def _initialize_master_working_set(): """ @@ -3262,10 +3350,7 @@ def _initialize_master_working_set(): # ensure that all distributions added to the working set in the future # (e.g. by calling ``require()``) will get activated as well, # with higher priority (replace=True). - tuple( - dist.activate(replace=False) - for dist in working_set - ) + tuple(dist.activate(replace=False) for dist in working_set) add_activation_listener( lambda dist: dist.activate(replace=True), existing=False, @@ -3274,12 +3359,3 @@ def _initialize_master_working_set(): # match order list(map(working_set.add_entry, sys.path)) globals().update(locals()) - - -class PkgResourcesDeprecationWarning(Warning): - """ - Base class for warning about deprecations in ``pkg_resources`` - - This class is not derived from ``DeprecationWarning``, and as such is - visible by default. - """ diff --git a/third_party/python/setuptools/pkg_resources/_vendor/appdirs.py b/third_party/python/setuptools/pkg_resources/_vendor/appdirs.py deleted file mode 100644 index ae67001af8..0000000000 --- a/third_party/python/setuptools/pkg_resources/_vendor/appdirs.py +++ /dev/null @@ -1,608 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2005-2010 ActiveState Software Inc. -# Copyright (c) 2013 Eddy Petrișor - -"""Utilities for determining application-specific dirs. - -See <http://github.com/ActiveState/appdirs> for details and usage. -""" -# Dev Notes: -# - MSDN on where to store app data files: -# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120 -# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html -# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html - -__version_info__ = (1, 4, 3) -__version__ = '.'.join(map(str, __version_info__)) - - -import sys -import os - -PY3 = sys.version_info[0] == 3 - -if PY3: - unicode = str - -if sys.platform.startswith('java'): - import platform - os_name = platform.java_ver()[3][0] - if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc. - system = 'win32' - elif os_name.startswith('Mac'): # "Mac OS X", etc. - system = 'darwin' - else: # "Linux", "SunOS", "FreeBSD", etc. - # Setting this to "linux2" is not ideal, but only Windows or Mac - # are actually checked for and the rest of the module expects - # *sys.platform* style strings. - system = 'linux2' -else: - system = sys.platform - - - -def user_data_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx> - for a discussion of issues. - - Typical user data directories are: - Mac OS X: ~/Library/Application Support/<AppName> - Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined - Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName> - Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName> - Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName> - Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName> - - For Unix, we follow the XDG spec and support $XDG_DATA_HOME. - That means, by default "~/.local/share/<AppName>". - """ - if system == "win32": - if appauthor is None: - appauthor = appname - const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA" - path = os.path.normpath(_get_win_folder(const)) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - elif system == 'darwin': - path = os.path.expanduser('~/Library/Application Support/') - if appname: - path = os.path.join(path, appname) - else: - path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def site_data_dir(appname=None, appauthor=None, version=None, multipath=False): - r"""Return full path to the user-shared data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "multipath" is an optional parameter only applicable to *nix - which indicates that the entire list of data dirs should be - returned. By default, the first item from XDG_DATA_DIRS is - returned, or '/usr/local/share/<AppName>', - if XDG_DATA_DIRS is not set - - Typical site data directories are: - Mac OS X: /Library/Application Support/<AppName> - Unix: /usr/local/share/<AppName> or /usr/share/<AppName> - Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName> - Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) - Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7. - - For Unix, this is using the $XDG_DATA_DIRS[0] default. - - WARNING: Do not use this on Windows. See the Vista-Fail note above for why. - """ - if system == "win32": - if appauthor is None: - appauthor = appname - path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA")) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - elif system == 'darwin': - path = os.path.expanduser('/Library/Application Support') - if appname: - path = os.path.join(path, appname) - else: - # XDG default for $XDG_DATA_DIRS - # only first, if multipath is False - path = os.getenv('XDG_DATA_DIRS', - os.pathsep.join(['/usr/local/share', '/usr/share'])) - pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] - if appname: - if version: - appname = os.path.join(appname, version) - pathlist = [os.sep.join([x, appname]) for x in pathlist] - - if multipath: - path = os.pathsep.join(pathlist) - else: - path = pathlist[0] - return path - - if appname and version: - path = os.path.join(path, version) - return path - - -def user_config_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific config dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx> - for a discussion of issues. - - Typical user config directories are: - Mac OS X: same as user_data_dir - Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined - Win *: same as user_data_dir - - For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME. - That means, by default "~/.config/<AppName>". - """ - if system in ["win32", "darwin"]: - path = user_data_dir(appname, appauthor, None, roaming) - else: - path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def site_config_dir(appname=None, appauthor=None, version=None, multipath=False): - r"""Return full path to the user-shared data dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "multipath" is an optional parameter only applicable to *nix - which indicates that the entire list of config dirs should be - returned. By default, the first item from XDG_CONFIG_DIRS is - returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set - - Typical site config directories are: - Mac OS X: same as site_data_dir - Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in - $XDG_CONFIG_DIRS - Win *: same as site_data_dir - Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.) - - For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False - - WARNING: Do not use this on Windows. See the Vista-Fail note above for why. - """ - if system in ["win32", "darwin"]: - path = site_data_dir(appname, appauthor) - if appname and version: - path = os.path.join(path, version) - else: - # XDG default for $XDG_CONFIG_DIRS - # only first, if multipath is False - path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg') - pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)] - if appname: - if version: - appname = os.path.join(appname, version) - pathlist = [os.sep.join([x, appname]) for x in pathlist] - - if multipath: - path = os.pathsep.join(pathlist) - else: - path = pathlist[0] - return path - - -def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True): - r"""Return full path to the user-specific cache dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "opinion" (boolean) can be False to disable the appending of - "Cache" to the base app data dir for Windows. See - discussion below. - - Typical user cache directories are: - Mac OS X: ~/Library/Caches/<AppName> - Unix: ~/.cache/<AppName> (XDG default) - Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache - Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache - - On Windows the only suggestion in the MSDN docs is that local settings go in - the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming - app data dir (the default returned by `user_data_dir` above). Apps typically - put cache data somewhere *under* the given dir here. Some examples: - ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache - ...\Acme\SuperApp\Cache\1.0 - OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value. - This can be disabled with the `opinion=False` option. - """ - if system == "win32": - if appauthor is None: - appauthor = appname - path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA")) - if appname: - if appauthor is not False: - path = os.path.join(path, appauthor, appname) - else: - path = os.path.join(path, appname) - if opinion: - path = os.path.join(path, "Cache") - elif system == 'darwin': - path = os.path.expanduser('~/Library/Caches') - if appname: - path = os.path.join(path, appname) - else: - path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache')) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def user_state_dir(appname=None, appauthor=None, version=None, roaming=False): - r"""Return full path to the user-specific state dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "roaming" (boolean, default False) can be set True to use the Windows - roaming appdata directory. That means that for users on a Windows - network setup for roaming profiles, this user data will be - sync'd on login. See - <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx> - for a discussion of issues. - - Typical user state directories are: - Mac OS X: same as user_data_dir - Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined - Win *: same as user_data_dir - - For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state> - to extend the XDG spec and support $XDG_STATE_HOME. - - That means, by default "~/.local/state/<AppName>". - """ - if system in ["win32", "darwin"]: - path = user_data_dir(appname, appauthor, None, roaming) - else: - path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state")) - if appname: - path = os.path.join(path, appname) - if appname and version: - path = os.path.join(path, version) - return path - - -def user_log_dir(appname=None, appauthor=None, version=None, opinion=True): - r"""Return full path to the user-specific log dir for this application. - - "appname" is the name of application. - If None, just the system directory is returned. - "appauthor" (only used on Windows) is the name of the - appauthor or distributing body for this application. Typically - it is the owning company name. This falls back to appname. You may - pass False to disable it. - "version" is an optional version path element to append to the - path. You might want to use this if you want multiple versions - of your app to be able to run independently. If used, this - would typically be "<major>.<minor>". - Only applied when appname is present. - "opinion" (boolean) can be False to disable the appending of - "Logs" to the base app data dir for Windows, and "log" to the - base cache dir for Unix. See discussion below. - - Typical user log directories are: - Mac OS X: ~/Library/Logs/<AppName> - Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined - Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs - Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs - - On Windows the only suggestion in the MSDN docs is that local settings - go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in - examples of what some windows apps use for a logs dir.) - - OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA` - value for Windows and appends "log" to the user cache dir for Unix. - This can be disabled with the `opinion=False` option. - """ - if system == "darwin": - path = os.path.join( - os.path.expanduser('~/Library/Logs'), - appname) - elif system == "win32": - path = user_data_dir(appname, appauthor, version) - version = False - if opinion: - path = os.path.join(path, "Logs") - else: - path = user_cache_dir(appname, appauthor, version) - version = False - if opinion: - path = os.path.join(path, "log") - if appname and version: - path = os.path.join(path, version) - return path - - -class AppDirs(object): - """Convenience wrapper for getting application dirs.""" - def __init__(self, appname=None, appauthor=None, version=None, - roaming=False, multipath=False): - self.appname = appname - self.appauthor = appauthor - self.version = version - self.roaming = roaming - self.multipath = multipath - - @property - def user_data_dir(self): - return user_data_dir(self.appname, self.appauthor, - version=self.version, roaming=self.roaming) - - @property - def site_data_dir(self): - return site_data_dir(self.appname, self.appauthor, - version=self.version, multipath=self.multipath) - - @property - def user_config_dir(self): - return user_config_dir(self.appname, self.appauthor, - version=self.version, roaming=self.roaming) - - @property - def site_config_dir(self): - return site_config_dir(self.appname, self.appauthor, - version=self.version, multipath=self.multipath) - - @property - def user_cache_dir(self): - return user_cache_dir(self.appname, self.appauthor, - version=self.version) - - @property - def user_state_dir(self): - return user_state_dir(self.appname, self.appauthor, - version=self.version) - - @property - def user_log_dir(self): - return user_log_dir(self.appname, self.appauthor, - version=self.version) - - -#---- internal support stuff - -def _get_win_folder_from_registry(csidl_name): - """This is a fallback technique at best. I'm not sure if using the - registry for this guarantees us the correct answer for all CSIDL_* - names. - """ - if PY3: - import winreg as _winreg - else: - import _winreg - - shell_folder_name = { - "CSIDL_APPDATA": "AppData", - "CSIDL_COMMON_APPDATA": "Common AppData", - "CSIDL_LOCAL_APPDATA": "Local AppData", - }[csidl_name] - - key = _winreg.OpenKey( - _winreg.HKEY_CURRENT_USER, - r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" - ) - dir, type = _winreg.QueryValueEx(key, shell_folder_name) - return dir - - -def _get_win_folder_with_pywin32(csidl_name): - from win32com.shell import shellcon, shell - dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0) - # Try to make this a unicode path because SHGetFolderPath does - # not return unicode strings when there is unicode data in the - # path. - try: - dir = unicode(dir) - - # Downgrade to short path name if have highbit chars. See - # <http://bugs.activestate.com/show_bug.cgi?id=85099>. - has_high_char = False - for c in dir: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - try: - import win32api - dir = win32api.GetShortPathName(dir) - except ImportError: - pass - except UnicodeError: - pass - return dir - - -def _get_win_folder_with_ctypes(csidl_name): - import ctypes - - csidl_const = { - "CSIDL_APPDATA": 26, - "CSIDL_COMMON_APPDATA": 35, - "CSIDL_LOCAL_APPDATA": 28, - }[csidl_name] - - buf = ctypes.create_unicode_buffer(1024) - ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) - - # Downgrade to short path name if have highbit chars. See - # <http://bugs.activestate.com/show_bug.cgi?id=85099>. - has_high_char = False - for c in buf: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - buf2 = ctypes.create_unicode_buffer(1024) - if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): - buf = buf2 - - return buf.value - -def _get_win_folder_with_jna(csidl_name): - import array - from com.sun import jna - from com.sun.jna.platform import win32 - - buf_size = win32.WinDef.MAX_PATH * 2 - buf = array.zeros('c', buf_size) - shell = win32.Shell32.INSTANCE - shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf) - dir = jna.Native.toString(buf.tostring()).rstrip("\0") - - # Downgrade to short path name if have highbit chars. See - # <http://bugs.activestate.com/show_bug.cgi?id=85099>. - has_high_char = False - for c in dir: - if ord(c) > 255: - has_high_char = True - break - if has_high_char: - buf = array.zeros('c', buf_size) - kernel = win32.Kernel32.INSTANCE - if kernel.GetShortPathName(dir, buf, buf_size): - dir = jna.Native.toString(buf.tostring()).rstrip("\0") - - return dir - -if system == "win32": - try: - import win32com.shell - _get_win_folder = _get_win_folder_with_pywin32 - except ImportError: - try: - from ctypes import windll - _get_win_folder = _get_win_folder_with_ctypes - except ImportError: - try: - import com.sun.jna - _get_win_folder = _get_win_folder_with_jna - except ImportError: - _get_win_folder = _get_win_folder_from_registry - - -#---- self test code - -if __name__ == "__main__": - appname = "MyApp" - appauthor = "MyCompany" - - props = ("user_data_dir", - "user_config_dir", - "user_cache_dir", - "user_state_dir", - "user_log_dir", - "site_data_dir", - "site_config_dir") - - print("-- app dirs %s --" % __version__) - - print("-- app dirs (with optional 'version')") - dirs = AppDirs(appname, appauthor, version="1.0") - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (without optional 'version')") - dirs = AppDirs(appname, appauthor) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (without optional 'appauthor')") - dirs = AppDirs(appname) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) - - print("\n-- app dirs (with disabled 'appauthor')") - dirs = AppDirs(appname, appauthor=False) - for prop in props: - print("%s: %s" % (prop, getattr(dirs, prop))) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/__init__.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/__init__.py new file mode 100644 index 0000000000..34e3a9950c --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/__init__.py @@ -0,0 +1,36 @@ +"""Read resources contained within a package.""" + +from ._common import ( + as_file, + files, + Package, +) + +from ._legacy import ( + contents, + open_binary, + read_binary, + open_text, + read_text, + is_resource, + path, + Resource, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Resource', + 'ResourceReader', + 'as_file', + 'contents', + 'files', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_adapters.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_adapters.py new file mode 100644 index 0000000000..ea363d86a5 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_adapters.py @@ -0,0 +1,170 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError( + "Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode) + ) + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_common.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_common.py new file mode 100644 index 0000000000..3c6de1cfb2 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_common.py @@ -0,0 +1,207 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib +import inspect +import warnings +import itertools + +from typing import Union, Optional, cast +from .abc import ResourceReader, Traversable + +from ._compat import wrap_spec + +Package = Union[types.ModuleType, str] +Anchor = Package + + +def package_to_anchor(func): + """ + Replace 'package' parameter as 'anchor' and warn about the change. + + Other errors should fall through. + + >>> files('a', 'b') + Traceback (most recent call last): + TypeError: files() takes from 0 to 1 positional arguments but 2 were given + """ + undefined = object() + + @functools.wraps(func) + def wrapper(anchor=undefined, package=undefined): + if package is not undefined: + if anchor is not undefined: + return func(anchor, package) + warnings.warn( + "First parameter to files is renamed to 'anchor'", + DeprecationWarning, + stacklevel=2, + ) + return func(package) + elif anchor is undefined: + return func() + return func(anchor) + + return wrapper + + +@package_to_anchor +def files(anchor: Optional[Anchor] = None) -> Traversable: + """ + Get a Traversable resource for an anchor. + """ + return from_package(resolve(anchor)) + + +def get_resource_reader(package: types.ModuleType) -> Optional[ResourceReader]: + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore + if reader is None: + return None + return reader(spec.name) # type: ignore + + +@functools.singledispatch +def resolve(cand: Optional[Anchor]) -> types.ModuleType: + return cast(types.ModuleType, cand) + + +@resolve.register +def _(cand: str) -> types.ModuleType: + return importlib.import_module(cand) + + +@resolve.register +def _(cand: None) -> types.ModuleType: + return resolve(_infer_caller().f_globals['__name__']) + + +def _infer_caller(): + """ + Walk the stack and find the frame of the first caller not in this module. + """ + + def is_this_file(frame_info): + return frame_info.filename == __file__ + + def is_wrapper(frame_info): + return frame_info.function == 'wrapper' + + not_this_file = itertools.filterfalse(is_this_file, inspect.stack()) + # also exclude 'wrapper' due to singledispatch in the call stack + callers = itertools.filterfalse(is_wrapper, not_this_file) + return next(callers).frame + + +def from_package(package: types.ModuleType): + """ + Return a Traversable object for the given package. + + """ + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile( + reader, + suffix='', + # gh-93353: Keep a reference to call os.remove() in late Python + # finalization. + *, + _os_remove=os.remove, +): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + _os_remove(raw_path) + except FileNotFoundError: + pass + + +def _temp_file(path): + return _tempfile(path.read_bytes, suffix=path.name) + + +def _is_present_dir(path: Traversable) -> bool: + """ + Some Traversables implement ``is_dir()`` to raise an + exception (i.e. ``FileNotFoundError``) when the + directory doesn't exist. This function wraps that call + to always return a boolean and only return True + if there's a dir and it exists. + """ + with contextlib.suppress(FileNotFoundError): + return path.is_dir() + return False + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _temp_dir(path) if _is_present_dir(path) else _temp_file(path) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path + + +@contextlib.contextmanager +def _temp_path(dir: tempfile.TemporaryDirectory): + """ + Wrap tempfile.TemporyDirectory to return a pathlib object. + """ + with dir as result: + yield pathlib.Path(result) + + +@contextlib.contextmanager +def _temp_dir(path): + """ + Given a traversable dir, recursively replicate the whole tree + to the file system in a context manager. + """ + assert path.is_dir() + with _temp_path(tempfile.TemporaryDirectory()) as temp_dir: + yield _write_contents(temp_dir, path) + + +def _write_contents(target, source): + child = target.joinpath(source.name) + if source.is_dir(): + child.mkdir() + for item in source.iterdir(): + _write_contents(child, item) + else: + child.write_bytes(source.read_bytes()) + return child diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_compat.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_compat.py new file mode 100644 index 0000000000..8b5b1d280f --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_compat.py @@ -0,0 +1,108 @@ +# flake8: noqa + +import abc +import os +import sys +import pathlib +from contextlib import suppress +from typing import Union + + +if sys.version_info >= (3, 10): + from zipfile import Path as ZipPath # type: ignore +else: + from ..zipp import Path as ZipPath # type: ignore + + +try: + from typing import runtime_checkable # type: ignore +except ImportError: + + def runtime_checkable(cls): # type: ignore + return cls + + +try: + from typing import Protocol # type: ignore +except ImportError: + Protocol = abc.ABC # type: ignore + + +class TraversableResourcesLoader: + """ + Adapt loaders to provide TraversableResources and other + compatibility. + + Used primarily for Python 3.9 and earlier where the native + loaders do not yet implement TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + @property + def path(self): + return self.spec.origin + + def get_resource_reader(self, name): + from . import readers, _adapters + + def _zip_reader(spec): + with suppress(AttributeError): + return readers.ZipReader(spec.loader, spec.name) + + def _namespace_reader(spec): + with suppress(AttributeError, ValueError): + return readers.NamespaceReader(spec.submodule_search_locations) + + def _available_reader(spec): + with suppress(AttributeError): + return spec.loader.get_resource_reader(spec.name) + + def _native_reader(spec): + reader = _available_reader(spec) + return reader if hasattr(reader, 'files') else None + + def _file_reader(spec): + try: + path = pathlib.Path(self.path) + except TypeError: + return None + if path.exists(): + return readers.FileReader(self) + + return ( + # native reader if it supplies 'files' + _native_reader(self.spec) + or + # local ZipReader if a zip module + _zip_reader(self.spec) + or + # local NamespaceReader if a namespace module + _namespace_reader(self.spec) + or + # local FileReader + _file_reader(self.spec) + # fallback - adapt the spec ResourceReader to TraversableReader + or _adapters.CompatibilityFiles(self.spec) + ) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + + Supersedes _adapters.wrap_spec to use TraversableResourcesLoader + from above for older Python compatibility (<3.10). + """ + from . import _adapters + + return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) + + +if sys.version_info >= (3, 9): + StrPath = Union[str, os.PathLike[str]] +else: + # PathLike is only subscriptable at runtime in 3.9+ + StrPath = Union[str, "os.PathLike[str]"] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_itertools.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_itertools.py new file mode 100644 index 0000000000..cce05582ff --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_itertools.py @@ -0,0 +1,35 @@ +from itertools import filterfalse + +from typing import ( + Callable, + Iterable, + Iterator, + Optional, + Set, + TypeVar, + Union, +) + +# Type and type variable definitions +_T = TypeVar('_T') +_U = TypeVar('_U') + + +def unique_everseen( + iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None +) -> Iterator[_T]: + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen: Set[Union[_T, _U]] = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_legacy.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_legacy.py new file mode 100644 index 0000000000..b1ea8105da --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/_legacy.py @@ -0,0 +1,120 @@ +import functools +import os +import pathlib +import types +import warnings + +from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any + +from . import _common + +Package = Union[types.ModuleType, str] +Resource = str + + +def deprecated(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + warnings.warn( + f"{func.__name__} is deprecated. Use files() instead. " + "Refer to https://importlib-resources.readthedocs.io" + "/en/latest/using.html#migrating-from-legacy for migration advice.", + DeprecationWarning, + stacklevel=2, + ) + return func(*args, **kwargs) + + return wrapper + + +def normalize_path(path: Any) -> str: + """Normalize a path by ensuring it is a string. + + If the resulting string contains path separators, an exception is raised. + """ + str_path = str(path) + parent, file_name = os.path.split(str_path) + if parent: + raise ValueError(f'{path!r} must be only a file name') + return file_name + + +@deprecated +def open_binary(package: Package, resource: Resource) -> BinaryIO: + """Return a file-like object opened for binary reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open('rb') + + +@deprecated +def read_binary(package: Package, resource: Resource) -> bytes: + """Return the binary contents of the resource.""" + return (_common.files(package) / normalize_path(resource)).read_bytes() + + +@deprecated +def open_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> TextIO: + """Return a file-like object opened for text reading of the resource.""" + return (_common.files(package) / normalize_path(resource)).open( + 'r', encoding=encoding, errors=errors + ) + + +@deprecated +def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict', +) -> str: + """Return the decoded string of the resource. + + The decoding-related arguments have the same semantics as those of + bytes.decode(). + """ + with open_text(package, resource, encoding, errors) as fp: + return fp.read() + + +@deprecated +def contents(package: Package) -> Iterable[str]: + """Return an iterable of entries in `package`. + + Note that not all entries are resources. Specifically, directories are + not considered resources. Use `is_resource()` on each entry returned here + to check if it is a resource or not. + """ + return [path.name for path in _common.files(package).iterdir()] + + +@deprecated +def is_resource(package: Package, name: str) -> bool: + """True if `name` is a resource inside `package`. + + Directories are *not* resources. + """ + resource = normalize_path(name) + return any( + traversable.name == resource and traversable.is_file() + for traversable in _common.files(package).iterdir() + ) + + +@deprecated +def path( + package: Package, + resource: Resource, +) -> ContextManager[pathlib.Path]: + """A context manager providing a file path object to the resource. + + If the resource does not already exist on its own on the file system, + a temporary file will be created. If the file was created, the file + will be deleted upon exiting the context manager (no exception is + raised if the file was deleted prior to the context manager + exiting). + """ + return _common.as_file(_common.files(package) / normalize_path(resource)) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/abc.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/abc.py new file mode 100644 index 0000000000..23b6aeafe4 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/abc.py @@ -0,0 +1,170 @@ +import abc +import io +import itertools +import pathlib +from typing import Any, BinaryIO, Iterable, Iterator, NoReturn, Text, Optional + +from ._compat import runtime_checkable, Protocol, StrPath + + +__all__ = ["ResourceReader", "Traversable", "TraversableResources"] + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +class TraversalError(Exception): + pass + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + Any exceptions that occur when accessing the backing resource + may propagate unaltered. + """ + + @abc.abstractmethod + def iterdir(self) -> Iterator["Traversable"]: + """ + Yield Traversable objects in self + """ + + def read_bytes(self) -> bytes: + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding: Optional[str] = None) -> str: + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + def joinpath(self, *descendants: StrPath) -> "Traversable": + """ + Return Traversable resolved with any descendants applied. + + Each descendant should be a path segment relative to self + and each may contain multiple levels separated by + ``posixpath.sep`` (``/``). + """ + if not descendants: + return self + names = itertools.chain.from_iterable( + path.parts for path in map(pathlib.PurePosixPath, descendants) + ) + target = next(names) + matches = ( + traversable for traversable in self.iterdir() if traversable.name == target + ) + try: + match = next(matches) + except StopIteration: + raise TraversalError( + "Target not found during traversal.", target, list(names) + ) + return match.joinpath(*names) + + def __truediv__(self, child: StrPath) -> "Traversable": + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self) -> "Traversable": + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource: StrPath) -> io.BufferedReader: + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource: Any) -> NoReturn: + raise FileNotFoundError(resource) + + def is_resource(self, path: StrPath) -> bool: + return self.files().joinpath(path).is_file() + + def contents(self) -> Iterator[str]: + return (item.name for item in self.files().iterdir()) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/readers.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/readers.py new file mode 100644 index 0000000000..ab34db7409 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/readers.py @@ -0,0 +1,120 @@ +import collections +import pathlib +import operator + +from . import abc + +from ._itertools import unique_everseen +from ._compat import ZipPath + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + _, _, name = module.rpartition('.') + self.prefix = loader.prefix.replace('\\', '/') + name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + # workaround for `zipfile.Path.is_file` returning true + # for non-existent paths. + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return ZipPath(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + files = (file for path in self._paths for file in path.iterdir()) + return unique_everseen(files, key=operator.attrgetter('name')) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, *descendants): + try: + return super().joinpath(*descendants) + except abc.TraversalError: + # One of the paths did not resolve (a directory does not exist). + # Just return something that will not exist. + return self._paths[0].joinpath(*descendants) + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*list(namespace_path)) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path diff --git a/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/simple.py b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/simple.py new file mode 100644 index 0000000000..7770c922c8 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/importlib_resources/simple.py @@ -0,0 +1,106 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @property + @abc.abstractmethod + def package(self) -> str: + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self) -> List['SimpleReader']: + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self) -> List[str]: + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource: str) -> BinaryIO: + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader: SimpleReader): + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent: ResourceContainer, name: str): + self.parent = parent + self.name = name # type: ignore + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(*args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/jaraco/__init__.py b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/__init__.py diff --git a/third_party/python/setuptools/pkg_resources/_vendor/jaraco/context.py b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/context.py new file mode 100644 index 0000000000..b0d1ef37cb --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/context.py @@ -0,0 +1,288 @@ +import os +import subprocess +import contextlib +import functools +import tempfile +import shutil +import operator +import warnings + + +@contextlib.contextmanager +def pushd(dir): + """ + >>> tmp_path = getfixture('tmp_path') + >>> with pushd(tmp_path): + ... assert os.getcwd() == os.fspath(tmp_path) + >>> assert os.getcwd() != os.fspath(tmp_path) + """ + + orig = os.getcwd() + os.chdir(dir) + try: + yield dir + finally: + os.chdir(orig) + + +@contextlib.contextmanager +def tarball_context(url, target_dir=None, runner=None, pushd=pushd): + """ + Get a tarball, extract it, change to that directory, yield, then + clean up. + `runner` is the function to invoke commands. + `pushd` is a context manager for changing the directory. + """ + if target_dir is None: + target_dir = os.path.basename(url).replace('.tar.gz', '').replace('.tgz', '') + if runner is None: + runner = functools.partial(subprocess.check_call, shell=True) + else: + warnings.warn("runner parameter is deprecated", DeprecationWarning) + # In the tar command, use --strip-components=1 to strip the first path and + # then + # use -C to cause the files to be extracted to {target_dir}. This ensures + # that we always know where the files were extracted. + runner('mkdir {target_dir}'.format(**vars())) + try: + getter = 'wget {url} -O -' + extract = 'tar x{compression} --strip-components=1 -C {target_dir}' + cmd = ' | '.join((getter, extract)) + runner(cmd.format(compression=infer_compression(url), **vars())) + with pushd(target_dir): + yield target_dir + finally: + runner('rm -Rf {target_dir}'.format(**vars())) + + +def infer_compression(url): + """ + Given a URL or filename, infer the compression code for tar. + + >>> infer_compression('http://foo/bar.tar.gz') + 'z' + >>> infer_compression('http://foo/bar.tgz') + 'z' + >>> infer_compression('file.bz') + 'j' + >>> infer_compression('file.xz') + 'J' + """ + # cheat and just assume it's the last two characters + compression_indicator = url[-2:] + mapping = dict(gz='z', bz='j', xz='J') + # Assume 'z' (gzip) if no match + return mapping.get(compression_indicator, 'z') + + +@contextlib.contextmanager +def temp_dir(remover=shutil.rmtree): + """ + Create a temporary directory context. Pass a custom remover + to override the removal behavior. + + >>> import pathlib + >>> with temp_dir() as the_dir: + ... assert os.path.isdir(the_dir) + ... _ = pathlib.Path(the_dir).joinpath('somefile').write_text('contents') + >>> assert not os.path.exists(the_dir) + """ + temp_dir = tempfile.mkdtemp() + try: + yield temp_dir + finally: + remover(temp_dir) + + +@contextlib.contextmanager +def repo_context(url, branch=None, quiet=True, dest_ctx=temp_dir): + """ + Check out the repo indicated by url. + + If dest_ctx is supplied, it should be a context manager + to yield the target directory for the check out. + """ + exe = 'git' if 'git' in url else 'hg' + with dest_ctx() as repo_dir: + cmd = [exe, 'clone', url, repo_dir] + if branch: + cmd.extend(['--branch', branch]) + devnull = open(os.path.devnull, 'w') + stdout = devnull if quiet else None + subprocess.check_call(cmd, stdout=stdout) + yield repo_dir + + +@contextlib.contextmanager +def null(): + """ + A null context suitable to stand in for a meaningful context. + + >>> with null() as value: + ... assert value is None + """ + yield + + +class ExceptionTrap: + """ + A context manager that will catch certain exceptions and provide an + indication they occurred. + + >>> with ExceptionTrap() as trap: + ... raise Exception() + >>> bool(trap) + True + + >>> with ExceptionTrap() as trap: + ... pass + >>> bool(trap) + False + + >>> with ExceptionTrap(ValueError) as trap: + ... raise ValueError("1 + 1 is not 3") + >>> bool(trap) + True + >>> trap.value + ValueError('1 + 1 is not 3') + >>> trap.tb + <traceback object at ...> + + >>> with ExceptionTrap(ValueError) as trap: + ... raise Exception() + Traceback (most recent call last): + ... + Exception + + >>> bool(trap) + False + """ + + exc_info = None, None, None + + def __init__(self, exceptions=(Exception,)): + self.exceptions = exceptions + + def __enter__(self): + return self + + @property + def type(self): + return self.exc_info[0] + + @property + def value(self): + return self.exc_info[1] + + @property + def tb(self): + return self.exc_info[2] + + def __exit__(self, *exc_info): + type = exc_info[0] + matches = type and issubclass(type, self.exceptions) + if matches: + self.exc_info = exc_info + return matches + + def __bool__(self): + return bool(self.type) + + def raises(self, func, *, _test=bool): + """ + Wrap func and replace the result with the truth + value of the trap (True if an exception occurred). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> raises = ExceptionTrap(ValueError).raises + + Now decorate a function that always fails. + + >>> @raises + ... def fail(): + ... raise ValueError('failed') + >>> fail() + True + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with ExceptionTrap(self.exceptions) as trap: + func(*args, **kwargs) + return _test(trap) + + return wrapper + + def passes(self, func): + """ + Wrap func and replace the result with the truth + value of the trap (True if no exception). + + First, give the decorator an alias to support Python 3.8 + Syntax. + + >>> passes = ExceptionTrap(ValueError).passes + + Now decorate a function that always fails. + + >>> @passes + ... def fail(): + ... raise ValueError('failed') + + >>> fail() + False + """ + return self.raises(func, _test=operator.not_) + + +class suppress(contextlib.suppress, contextlib.ContextDecorator): + """ + A version of contextlib.suppress with decorator support. + + >>> @suppress(KeyError) + ... def key_error(): + ... {}[''] + >>> key_error() + """ + + +class on_interrupt(contextlib.ContextDecorator): + """ + Replace a KeyboardInterrupt with SystemExit(1) + + >>> def do_interrupt(): + ... raise KeyboardInterrupt() + >>> on_interrupt('error')(do_interrupt)() + Traceback (most recent call last): + ... + SystemExit: 1 + >>> on_interrupt('error', code=255)(do_interrupt)() + Traceback (most recent call last): + ... + SystemExit: 255 + >>> on_interrupt('suppress')(do_interrupt)() + >>> with __import__('pytest').raises(KeyboardInterrupt): + ... on_interrupt('ignore')(do_interrupt)() + """ + + def __init__( + self, + action='error', + # py3.7 compat + # /, + code=1, + ): + self.action = action + self.code = code + + def __enter__(self): + return self + + def __exit__(self, exctype, excinst, exctb): + if exctype is not KeyboardInterrupt or self.action == 'ignore': + return + elif self.action == 'error': + raise SystemExit(self.code) from excinst + return self.action == 'suppress' diff --git a/third_party/python/setuptools/pkg_resources/_vendor/jaraco/functools.py b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/functools.py new file mode 100644 index 0000000000..67aeadc353 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/functools.py @@ -0,0 +1,556 @@ +import functools +import time +import inspect +import collections +import types +import itertools +import warnings + +import pkg_resources.extern.more_itertools + +from typing import Callable, TypeVar + + +CallableT = TypeVar("CallableT", bound=Callable[..., object]) + + +def compose(*funcs): + """ + Compose any number of unary functions into a single unary function. + + >>> import textwrap + >>> expected = str.strip(textwrap.dedent(compose.__doc__)) + >>> strip_and_dedent = compose(str.strip, textwrap.dedent) + >>> strip_and_dedent(compose.__doc__) == expected + True + + Compose also allows the innermost function to take arbitrary arguments. + + >>> round_three = lambda x: round(x, ndigits=3) + >>> f = compose(round_three, int.__truediv__) + >>> [f(3*x, x+1) for x in range(1,10)] + [1.5, 2.0, 2.25, 2.4, 2.5, 2.571, 2.625, 2.667, 2.7] + """ + + def compose_two(f1, f2): + return lambda *args, **kwargs: f1(f2(*args, **kwargs)) + + return functools.reduce(compose_two, funcs) + + +def method_caller(method_name, *args, **kwargs): + """ + Return a function that will call a named method on the + target object with optional positional and keyword + arguments. + + >>> lower = method_caller('lower') + >>> lower('MyString') + 'mystring' + """ + + def call_method(target): + func = getattr(target, method_name) + return func(*args, **kwargs) + + return call_method + + +def once(func): + """ + Decorate func so it's only ever called the first time. + + This decorator can ensure that an expensive or non-idempotent function + will not be expensive on subsequent calls and is idempotent. + + >>> add_three = once(lambda a: a+3) + >>> add_three(3) + 6 + >>> add_three(9) + 6 + >>> add_three('12') + 6 + + To reset the stored value, simply clear the property ``saved_result``. + + >>> del add_three.saved_result + >>> add_three(9) + 12 + >>> add_three(8) + 12 + + Or invoke 'reset()' on it. + + >>> add_three.reset() + >>> add_three(-3) + 0 + >>> add_three(0) + 0 + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not hasattr(wrapper, 'saved_result'): + wrapper.saved_result = func(*args, **kwargs) + return wrapper.saved_result + + wrapper.reset = lambda: vars(wrapper).__delitem__('saved_result') + return wrapper + + +def method_cache( + method: CallableT, + cache_wrapper: Callable[ + [CallableT], CallableT + ] = functools.lru_cache(), # type: ignore[assignment] +) -> CallableT: + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + + def wrapper(self: object, *args: object, **kwargs: object) -> object: + # it's the first call, replace the method with a cached, bound method + bound_method: CallableT = types.MethodType( # type: ignore[assignment] + method, self + ) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None # type: ignore[attr-defined] + + return ( # type: ignore[return-value] + _special_method_cache(method, cache_wrapper) or wrapper + ) + + +def _special_method_cache(method, cache_wrapper): + """ + Because Python treats special methods differently, it's not + possible to use instance attributes to implement the cached + methods. + + Instead, install the wrapper method under a different name + and return a simple proxy to that wrapper. + + https://github.com/jaraco/jaraco.functools/issues/5 + """ + name = method.__name__ + special_names = '__getattr__', '__getitem__' + if name not in special_names: + return + + wrapper_name = '__cached' + name + + def proxy(self, *args, **kwargs): + if wrapper_name not in vars(self): + bound = types.MethodType(method, self) + cache = cache_wrapper(bound) + setattr(self, wrapper_name, cache) + else: + cache = getattr(self, wrapper_name) + return cache(*args, **kwargs) + + return proxy + + +def apply(transform): + """ + Decorate a function with a transform function that is + invoked on results returned from the decorated function. + + >>> @apply(reversed) + ... def get_numbers(start): + ... "doc for get_numbers" + ... return range(start, start+3) + >>> list(get_numbers(4)) + [6, 5, 4] + >>> get_numbers.__doc__ + 'doc for get_numbers' + """ + + def wrap(func): + return functools.wraps(func)(compose(transform, func)) + + return wrap + + +def result_invoke(action): + r""" + Decorate a function with an action function that is + invoked on the results returned from the decorated + function (for its side-effect), then return the original + result. + + >>> @result_invoke(print) + ... def add_two(a, b): + ... return a + b + >>> x = add_two(2, 3) + 5 + >>> x + 5 + """ + + def wrap(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + action(result) + return result + + return wrapper + + return wrap + + +def invoke(f, *args, **kwargs): + """ + Call a function for its side effect after initialization. + + The benefit of using the decorator instead of simply invoking a function + after defining it is that it makes explicit the author's intent for the + function to be called immediately. Whereas if one simply calls the + function immediately, it's less obvious if that was intentional or + incidental. It also avoids repeating the name - the two actions, defining + the function and calling it immediately are modeled separately, but linked + by the decorator construct. + + The benefit of having a function construct (opposed to just invoking some + behavior inline) is to serve as a scope in which the behavior occurs. It + avoids polluting the global namespace with local variables, provides an + anchor on which to attach documentation (docstring), keeps the behavior + logically separated (instead of conceptually separated or not separated at + all), and provides potential to re-use the behavior for testing or other + purposes. + + This function is named as a pithy way to communicate, "call this function + primarily for its side effect", or "while defining this function, also + take it aside and call it". It exists because there's no Python construct + for "define and call" (nor should there be, as decorators serve this need + just fine). The behavior happens immediately and synchronously. + + >>> @invoke + ... def func(): print("called") + called + >>> func() + called + + Use functools.partial to pass parameters to the initial call + + >>> @functools.partial(invoke, name='bingo') + ... def func(name): print("called with", name) + called with bingo + """ + f(*args, **kwargs) + return f + + +def call_aside(*args, **kwargs): + """ + Deprecated name for invoke. + """ + warnings.warn("call_aside is deprecated, use invoke", DeprecationWarning) + return invoke(*args, **kwargs) + + +class Throttler: + """ + Rate-limit a function (or other callable) + """ + + def __init__(self, func, max_rate=float('Inf')): + if isinstance(func, Throttler): + func = func.func + self.func = func + self.max_rate = max_rate + self.reset() + + def reset(self): + self.last_called = 0 + + def __call__(self, *args, **kwargs): + self._wait() + return self.func(*args, **kwargs) + + def _wait(self): + "ensure at least 1/max_rate seconds from last call" + elapsed = time.time() - self.last_called + must_wait = 1 / self.max_rate - elapsed + time.sleep(max(0, must_wait)) + self.last_called = time.time() + + def __get__(self, obj, type=None): + return first_invoke(self._wait, functools.partial(self.func, obj)) + + +def first_invoke(func1, func2): + """ + Return a function that when invoked will invoke func1 without + any parameters (for its side-effect) and then invoke func2 + with whatever parameters were passed, returning its result. + """ + + def wrapper(*args, **kwargs): + func1() + return func2(*args, **kwargs) + + return wrapper + + +def retry_call(func, cleanup=lambda: None, retries=0, trap=()): + """ + Given a callable func, trap the indicated exceptions + for up to 'retries' times, invoking cleanup on the + exception. On the final attempt, allow any exceptions + to propagate. + """ + attempts = itertools.count() if retries == float('inf') else range(retries) + for attempt in attempts: + try: + return func() + except trap: + cleanup() + + return func() + + +def retry(*r_args, **r_kwargs): + """ + Decorator wrapper for retry_call. Accepts arguments to retry_call + except func and then returns a decorator for the decorated function. + + Ex: + + >>> @retry(retries=3) + ... def my_func(a, b): + ... "this is my funk" + ... print(a, b) + >>> my_func.__doc__ + 'this is my funk' + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*f_args, **f_kwargs): + bound = functools.partial(func, *f_args, **f_kwargs) + return retry_call(bound, *r_args, **r_kwargs) + + return wrapper + + return decorate + + +def print_yielded(func): + """ + Convert a generator into a function that prints all yielded elements + + >>> @print_yielded + ... def x(): + ... yield 3; yield None + >>> x() + 3 + None + """ + print_all = functools.partial(map, print) + print_results = compose(more_itertools.consume, print_all, func) + return functools.wraps(func)(print_results) + + +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper + + +def assign_params(func, namespace): + """ + Assign parameters from namespace where func solicits. + + >>> def func(x, y=3): + ... print(x, y) + >>> assigned = assign_params(func, dict(x=2, z=4)) + >>> assigned() + 2 3 + + The usual errors are raised if a function doesn't receive + its required parameters: + + >>> assigned = assign_params(func, dict(y=3, z=4)) + >>> assigned() + Traceback (most recent call last): + TypeError: func() ...argument... + + It even works on methods: + + >>> class Handler: + ... def meth(self, arg): + ... print(arg) + >>> assign_params(Handler().meth, dict(arg='crystal', foo='clear'))() + crystal + """ + sig = inspect.signature(func) + params = sig.parameters.keys() + call_ns = {k: namespace[k] for k in params if k in namespace} + return functools.partial(func, **call_ns) + + +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + + >>> class MyClass: + ... @save_method_args + ... def method(self, a, b): + ... print(a, b) + >>> my_ob = MyClass() + >>> my_ob.method(1, 2) + 1 2 + >>> my_ob._saved_method.args + (1, 2) + >>> my_ob._saved_method.kwargs + {} + >>> my_ob.method(a=3, b='foo') + 3 foo + >>> my_ob._saved_method.args + () + >>> my_ob._saved_method.kwargs == dict(a=3, b='foo') + True + + The arguments are stored on the instance, allowing for + different instance to save different args. + + >>> your_ob = MyClass() + >>> your_ob.method({str('x'): 3}, b=[4]) + {'x': 3} [4] + >>> your_ob._saved_method.args + ({'x': 3},) + >>> my_ob._saved_method.args + () + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') + + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper + + +def except_(*exceptions, replace=None, use=None): + """ + Replace the indicated exceptions, if raised, with the indicated + literal replacement or evaluated expression (if present). + + >>> safe_int = except_(ValueError)(int) + >>> safe_int('five') + >>> safe_int('5') + 5 + + Specify a literal replacement with ``replace``. + + >>> safe_int_r = except_(ValueError, replace=0)(int) + >>> safe_int_r('five') + 0 + + Provide an expression to ``use`` to pass through particular parameters. + + >>> safe_int_pt = except_(ValueError, use='args[0]')(int) + >>> safe_int_pt('five') + 'five' + + """ + + def decorate(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except exceptions: + try: + return eval(use) + except TypeError: + return replace + + return wrapper + + return decorate diff --git a/third_party/python/setuptools/pkg_resources/_vendor/jaraco/text/__init__.py b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/text/__init__.py new file mode 100644 index 0000000000..c466378ceb --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/jaraco/text/__init__.py @@ -0,0 +1,599 @@ +import re +import itertools +import textwrap +import functools + +try: + from importlib.resources import files # type: ignore +except ImportError: # pragma: nocover + from pkg_resources.extern.importlib_resources import files # type: ignore + +from pkg_resources.extern.jaraco.functools import compose, method_cache +from pkg_resources.extern.jaraco.context import ExceptionTrap + + +def substitution(old, new): + """ + Return a function that will perform a substitution on a string + """ + return lambda s: s.replace(old, new) + + +def multi_substitution(*substitutions): + """ + Take a sequence of pairs specifying substitutions, and create + a function that performs those substitutions. + + >>> multi_substitution(('foo', 'bar'), ('bar', 'baz'))('foo') + 'baz' + """ + substitutions = itertools.starmap(substitution, substitutions) + # compose function applies last function first, so reverse the + # substitutions to get the expected order. + substitutions = reversed(tuple(substitutions)) + return compose(*substitutions) + + +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use ``in_``: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) + + +# Python 3.8 compatibility +_unicode_trap = ExceptionTrap(UnicodeDecodeError) + + +@_unicode_trap.passes +def is_decodable(value): + r""" + Return True if the supplied value is decodable (using the default + encoding). + + >>> is_decodable(b'\xff') + False + >>> is_decodable(b'\x32') + True + """ + value.decode() + + +def is_binary(value): + r""" + Return True if the value appears to be binary (that is, it's a byte + string and isn't decodable). + + >>> is_binary(b'\xff') + True + >>> is_binary('\xff') + False + """ + return isinstance(value, bytes) and not is_decodable(value) + + +def trim(s): + r""" + Trim something like a docstring to remove the whitespace that + is common due to indentation and formatting. + + >>> trim("\n\tfoo = bar\n\t\tbar = baz\n") + 'foo = bar\n\tbar = baz' + """ + return textwrap.dedent(s).strip() + + +def wrap(s): + """ + Wrap lines of text, retaining existing newlines as + paragraph markers. + + >>> print(wrap(lorem_ipsum)) + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do + eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad + minim veniam, quis nostrud exercitation ullamco laboris nisi ut + aliquip ex ea commodo consequat. Duis aute irure dolor in + reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in + culpa qui officia deserunt mollit anim id est laborum. + <BLANKLINE> + Curabitur pretium tincidunt lacus. Nulla gravida orci a odio. Nullam + varius, turpis et commodo pharetra, est eros bibendum elit, nec luctus + magna felis sollicitudin mauris. Integer in mauris eu nibh euismod + gravida. Duis ac tellus et risus vulputate vehicula. Donec lobortis + risus a elit. Etiam tempor. Ut ullamcorper, ligula eu tempor congue, + eros est euismod turpis, id tincidunt sapien risus a quam. Maecenas + fermentum consequat mi. Donec fermentum. Pellentesque malesuada nulla + a mi. Duis sapien sem, aliquet nec, commodo eget, consequat quis, + neque. Aliquam faucibus, elit ut dictum aliquet, felis nisl adipiscing + sapien, sed malesuada diam lacus eget erat. Cras mollis scelerisque + nunc. Nullam arcu. Aliquam consequat. Curabitur augue lorem, dapibus + quis, laoreet et, pretium ac, nisi. Aenean magna nisl, mollis quis, + molestie eu, feugiat in, orci. In hac habitasse platea dictumst. + """ + paragraphs = s.splitlines() + wrapped = ('\n'.join(textwrap.wrap(para)) for para in paragraphs) + return '\n\n'.join(wrapped) + + +def unwrap(s): + r""" + Given a multi-line string, return an unwrapped version. + + >>> wrapped = wrap(lorem_ipsum) + >>> wrapped.count('\n') + 20 + >>> unwrapped = unwrap(wrapped) + >>> unwrapped.count('\n') + 1 + >>> print(unwrapped) + Lorem ipsum dolor sit amet, consectetur adipiscing ... + Curabitur pretium tincidunt lacus. Nulla gravida orci ... + + """ + paragraphs = re.split(r'\n\n+', s) + cleaned = (para.replace('\n', ' ') for para in paragraphs) + return '\n'.join(cleaned) + + + + +class Splitter(object): + """object that will split a string with the given arguments for each call + + >>> s = Splitter(',') + >>> s('hello, world, this is your, master calling') + ['hello', ' world', ' this is your', ' master calling'] + """ + + def __init__(self, *args): + self.args = args + + def __call__(self, s): + return s.split(*self.args) + + +def indent(string, prefix=' ' * 4): + """ + >>> indent('foo') + ' foo' + """ + return prefix + string + + +class WordSet(tuple): + """ + Given an identifier, return the words that identifier represents, + whether in camel case, underscore-separated, etc. + + >>> WordSet.parse("camelCase") + ('camel', 'Case') + + >>> WordSet.parse("under_sep") + ('under', 'sep') + + Acronyms should be retained + + >>> WordSet.parse("firstSNL") + ('first', 'SNL') + + >>> WordSet.parse("you_and_I") + ('you', 'and', 'I') + + >>> WordSet.parse("A simple test") + ('A', 'simple', 'test') + + Multiple caps should not interfere with the first cap of another word. + + >>> WordSet.parse("myABCClass") + ('my', 'ABC', 'Class') + + The result is a WordSet, so you can get the form you need. + + >>> WordSet.parse("myABCClass").underscore_separated() + 'my_ABC_Class' + + >>> WordSet.parse('a-command').camel_case() + 'ACommand' + + >>> WordSet.parse('someIdentifier').lowered().space_separated() + 'some identifier' + + Slices of the result should return another WordSet. + + >>> WordSet.parse('taken-out-of-context')[1:].underscore_separated() + 'out_of_context' + + >>> WordSet.from_class_name(WordSet()).lowered().space_separated() + 'word set' + + >>> example = WordSet.parse('figured it out') + >>> example.headless_camel_case() + 'figuredItOut' + >>> example.dash_separated() + 'figured-it-out' + + """ + + _pattern = re.compile('([A-Z]?[a-z]+)|([A-Z]+(?![a-z]))') + + def capitalized(self): + return WordSet(word.capitalize() for word in self) + + def lowered(self): + return WordSet(word.lower() for word in self) + + def camel_case(self): + return ''.join(self.capitalized()) + + def headless_camel_case(self): + words = iter(self) + first = next(words).lower() + new_words = itertools.chain((first,), WordSet(words).camel_case()) + return ''.join(new_words) + + def underscore_separated(self): + return '_'.join(self) + + def dash_separated(self): + return '-'.join(self) + + def space_separated(self): + return ' '.join(self) + + def trim_right(self, item): + """ + Remove the item from the end of the set. + + >>> WordSet.parse('foo bar').trim_right('foo') + ('foo', 'bar') + >>> WordSet.parse('foo bar').trim_right('bar') + ('foo',) + >>> WordSet.parse('').trim_right('bar') + () + """ + return self[:-1] if self and self[-1] == item else self + + def trim_left(self, item): + """ + Remove the item from the beginning of the set. + + >>> WordSet.parse('foo bar').trim_left('foo') + ('bar',) + >>> WordSet.parse('foo bar').trim_left('bar') + ('foo', 'bar') + >>> WordSet.parse('').trim_left('bar') + () + """ + return self[1:] if self and self[0] == item else self + + def trim(self, item): + """ + >>> WordSet.parse('foo bar').trim('foo') + ('bar',) + """ + return self.trim_left(item).trim_right(item) + + def __getitem__(self, item): + result = super(WordSet, self).__getitem__(item) + if isinstance(item, slice): + result = WordSet(result) + return result + + @classmethod + def parse(cls, identifier): + matches = cls._pattern.finditer(identifier) + return WordSet(match.group(0) for match in matches) + + @classmethod + def from_class_name(cls, subject): + return cls.parse(subject.__class__.__name__) + + +# for backward compatibility +words = WordSet.parse + + +def simple_html_strip(s): + r""" + Remove HTML from the string `s`. + + >>> str(simple_html_strip('')) + '' + + >>> print(simple_html_strip('A <bold>stormy</bold> day in paradise')) + A stormy day in paradise + + >>> print(simple_html_strip('Somebody <!-- do not --> tell the truth.')) + Somebody tell the truth. + + >>> print(simple_html_strip('What about<br/>\nmultiple lines?')) + What about + multiple lines? + """ + html_stripper = re.compile('(<!--.*?-->)|(<[^>]*>)|([^<]+)', re.DOTALL) + texts = (match.group(3) or '' for match in html_stripper.finditer(s)) + return ''.join(texts) + + +class SeparatedValues(str): + """ + A string separated by a separator. Overrides __iter__ for getting + the values. + + >>> list(SeparatedValues('a,b,c')) + ['a', 'b', 'c'] + + Whitespace is stripped and empty values are discarded. + + >>> list(SeparatedValues(' a, b , c, ')) + ['a', 'b', 'c'] + """ + + separator = ',' + + def __iter__(self): + parts = self.split(self.separator) + return filter(None, (part.strip() for part in parts)) + + +class Stripper: + r""" + Given a series of lines, find the common prefix and strip it from them. + + >>> lines = [ + ... 'abcdefg\n', + ... 'abc\n', + ... 'abcde\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix + 'abc' + >>> list(res.lines) + ['defg\n', '\n', 'de\n'] + + If no prefix is common, nothing should be stripped. + + >>> lines = [ + ... 'abcd\n', + ... '1234\n', + ... ] + >>> res = Stripper.strip_prefix(lines) + >>> res.prefix = '' + >>> list(res.lines) + ['abcd\n', '1234\n'] + """ + + def __init__(self, prefix, lines): + self.prefix = prefix + self.lines = map(self, lines) + + @classmethod + def strip_prefix(cls, lines): + prefix_lines, lines = itertools.tee(lines) + prefix = functools.reduce(cls.common_prefix, prefix_lines) + return cls(prefix, lines) + + def __call__(self, line): + if not self.prefix: + return line + null, prefix, rest = line.partition(self.prefix) + return rest + + @staticmethod + def common_prefix(s1, s2): + """ + Return the common prefix of two lines. + """ + index = min(len(s1), len(s2)) + while s1[:index] != s2[:index]: + index -= 1 + return s1[:index] + + +def remove_prefix(text, prefix): + """ + Remove the prefix from the text if it exists. + + >>> remove_prefix('underwhelming performance', 'underwhelming ') + 'performance' + + >>> remove_prefix('something special', 'sample') + 'something special' + """ + null, prefix, rest = text.rpartition(prefix) + return rest + + +def remove_suffix(text, suffix): + """ + Remove the suffix from the text if it exists. + + >>> remove_suffix('name.git', '.git') + 'name' + + >>> remove_suffix('something special', 'sample') + 'something special' + """ + rest, suffix, null = text.partition(suffix) + return rest + + +def normalize_newlines(text): + r""" + Replace alternate newlines with the canonical newline. + + >>> normalize_newlines('Lorem Ipsum\u2029') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\r\n') + 'Lorem Ipsum\n' + >>> normalize_newlines('Lorem Ipsum\x85') + 'Lorem Ipsum\n' + """ + newlines = ['\r\n', '\r', '\n', '\u0085', '\u2028', '\u2029'] + pattern = '|'.join(newlines) + return re.sub(pattern, '\n', text) + + +def _nonblank(str): + return str and not str.startswith('#') + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(' #')[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith('\\'): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item diff --git a/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/__init__.py b/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/__init__.py new file mode 100644 index 0000000000..66443971df --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/__init__.py @@ -0,0 +1,6 @@ +"""More routines for operating on iterables, beyond itertools""" + +from .more import * # noqa +from .recipes import * # noqa + +__version__ = '9.1.0' diff --git a/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/more.py b/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/more.py new file mode 100644 index 0000000000..e0e2d3de92 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/more.py @@ -0,0 +1,4391 @@ +import warnings + +from collections import Counter, defaultdict, deque, abc +from collections.abc import Sequence +from functools import partial, reduce, wraps +from heapq import heapify, heapreplace, heappop +from itertools import ( + chain, + compress, + count, + cycle, + dropwhile, + groupby, + islice, + repeat, + starmap, + takewhile, + tee, + zip_longest, +) +from math import exp, factorial, floor, log +from queue import Empty, Queue +from random import random, randrange, uniform +from operator import itemgetter, mul, sub, gt, lt, ge, le +from sys import hexversion, maxsize +from time import monotonic + +from .recipes import ( + _marker, + _zip_equal, + UnequalIterablesError, + consume, + flatten, + pairwise, + powerset, + take, + unique_everseen, + all_equal, +) + +__all__ = [ + 'AbortThread', + 'SequenceView', + 'UnequalIterablesError', + 'adjacent', + 'all_unique', + 'always_iterable', + 'always_reversible', + 'bucket', + 'callback_iter', + 'chunked', + 'chunked_even', + 'circular_shifts', + 'collapse', + 'combination_index', + 'consecutive_groups', + 'constrained_batches', + 'consumer', + 'count_cycle', + 'countable', + 'difference', + 'distinct_combinations', + 'distinct_permutations', + 'distribute', + 'divide', + 'duplicates_everseen', + 'duplicates_justseen', + 'exactly_n', + 'filter_except', + 'first', + 'gray_product', + 'groupby_transform', + 'ichunked', + 'iequals', + 'ilen', + 'interleave', + 'interleave_evenly', + 'interleave_longest', + 'intersperse', + 'is_sorted', + 'islice_extended', + 'iterate', + 'last', + 'locate', + 'longest_common_prefix', + 'lstrip', + 'make_decorator', + 'map_except', + 'map_if', + 'map_reduce', + 'mark_ends', + 'minmax', + 'nth_or_last', + 'nth_permutation', + 'nth_product', + 'numeric_range', + 'one', + 'only', + 'padded', + 'partitions', + 'peekable', + 'permutation_index', + 'product_index', + 'raise_', + 'repeat_each', + 'repeat_last', + 'replace', + 'rlocate', + 'rstrip', + 'run_length', + 'sample', + 'seekable', + 'set_partitions', + 'side_effect', + 'sliced', + 'sort_together', + 'split_after', + 'split_at', + 'split_before', + 'split_into', + 'split_when', + 'spy', + 'stagger', + 'strip', + 'strictly_n', + 'substrings', + 'substrings_indexes', + 'time_limited', + 'unique_in_window', + 'unique_to_each', + 'unzip', + 'value_chain', + 'windowed', + 'windowed_complete', + 'with_iter', + 'zip_broadcast', + 'zip_equal', + 'zip_offset', +] + + +def chunked(iterable, n, strict=False): + """Break *iterable* into lists of length *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6], 3)) + [[1, 2, 3], [4, 5, 6]] + + By the default, the last yielded list will have fewer than *n* elements + if the length of *iterable* is not divisible by *n*: + + >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3)) + [[1, 2, 3], [4, 5, 6], [7, 8]] + + To use a fill-in value instead, see the :func:`grouper` recipe. + + If the length of *iterable* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + list is yielded. + + """ + iterator = iter(partial(take, n, iter(iterable)), []) + if strict: + if n is None: + raise ValueError('n must not be None when using strict mode.') + + def ret(): + for chunk in iterator: + if len(chunk) != n: + raise ValueError('iterable is not divisible by n.') + yield chunk + + return iter(ret()) + else: + return iterator + + +def first(iterable, default=_marker): + """Return the first item of *iterable*, or *default* if *iterable* is + empty. + + >>> first([0, 1, 2, 3]) + 0 + >>> first([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + + :func:`first` is useful when you have a generator of expensive-to-retrieve + values and want any arbitrary one. It is marginally shorter than + ``next(iter(iterable), default)``. + + """ + try: + return next(iter(iterable)) + except StopIteration as e: + if default is _marker: + raise ValueError( + 'first() was called on an empty iterable, and no ' + 'default value was provided.' + ) from e + return default + + +def last(iterable, default=_marker): + """Return the last item of *iterable*, or *default* if *iterable* is + empty. + + >>> last([0, 1, 2, 3]) + 3 + >>> last([], 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + try: + if isinstance(iterable, Sequence): + return iterable[-1] + # Work around https://bugs.python.org/issue38525 + elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0): + return next(reversed(iterable)) + else: + return deque(iterable, maxlen=1)[-1] + except (IndexError, TypeError, StopIteration): + if default is _marker: + raise ValueError( + 'last() was called on an empty iterable, and no default was ' + 'provided.' + ) + return default + + +def nth_or_last(iterable, n, default=_marker): + """Return the nth or the last item of *iterable*, + or *default* if *iterable* is empty. + + >>> nth_or_last([0, 1, 2, 3], 2) + 2 + >>> nth_or_last([0, 1], 2) + 1 + >>> nth_or_last([], 0, 'some default') + 'some default' + + If *default* is not provided and there are no items in the iterable, + raise ``ValueError``. + """ + return last(islice(iterable, n + 1), default=default) + + +class peekable: + """Wrap an iterator to allow lookahead and prepending elements. + + Call :meth:`peek` on the result to get the value that will be returned + by :func:`next`. This won't advance the iterator: + + >>> p = peekable(['a', 'b']) + >>> p.peek() + 'a' + >>> next(p) + 'a' + + Pass :meth:`peek` a default value to return that instead of raising + ``StopIteration`` when the iterator is exhausted. + + >>> p = peekable([]) + >>> p.peek('hi') + 'hi' + + peekables also offer a :meth:`prepend` method, which "inserts" items + at the head of the iterable: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> p.peek() + 11 + >>> list(p) + [11, 12, 1, 2, 3] + + peekables can be indexed. Index 0 is the item that will be returned by + :func:`next`, index 1 is the item after that, and so on: + The values up to the given index will be cached. + + >>> p = peekable(['a', 'b', 'c', 'd']) + >>> p[0] + 'a' + >>> p[1] + 'b' + >>> next(p) + 'a' + + Negative indexes are supported, but be aware that they will cache the + remaining items in the source iterator, which may require significant + storage. + + To check whether a peekable is exhausted, check its truth value: + + >>> p = peekable(['a', 'b']) + >>> if p: # peekable has items + ... list(p) + ['a', 'b'] + >>> if not p: # peekable is exhausted + ... list(p) + [] + + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self._cache = deque() + + def __iter__(self): + return self + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + """Return the item that will be next returned from ``next()``. + + Return ``default`` if there are no items left. If ``default`` is not + provided, raise ``StopIteration``. + + """ + if not self._cache: + try: + self._cache.append(next(self._it)) + except StopIteration: + if default is _marker: + raise + return default + return self._cache[0] + + def prepend(self, *items): + """Stack up items to be the next ones returned from ``next()`` or + ``self.peek()``. The items will be returned in + first in, first out order:: + + >>> p = peekable([1, 2, 3]) + >>> p.prepend(10, 11, 12) + >>> next(p) + 10 + >>> list(p) + [11, 12, 1, 2, 3] + + It is possible, by prepending items, to "resurrect" a peekable that + previously raised ``StopIteration``. + + >>> p = peekable([]) + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + >>> p.prepend(1) + >>> next(p) + 1 + >>> next(p) + Traceback (most recent call last): + ... + StopIteration + + """ + self._cache.extendleft(reversed(items)) + + def __next__(self): + if self._cache: + return self._cache.popleft() + + return next(self._it) + + def _get_slice(self, index): + # Normalize the slice's arguments + step = 1 if (index.step is None) else index.step + if step > 0: + start = 0 if (index.start is None) else index.start + stop = maxsize if (index.stop is None) else index.stop + elif step < 0: + start = -1 if (index.start is None) else index.start + stop = (-maxsize - 1) if (index.stop is None) else index.stop + else: + raise ValueError('slice step cannot be zero') + + # If either the start or stop index is negative, we'll need to cache + # the rest of the iterable in order to slice from the right side. + if (start < 0) or (stop < 0): + self._cache.extend(self._it) + # Otherwise we'll need to find the rightmost index and cache to that + # point. + else: + n = min(max(start, stop) + 1, maxsize) + cache_len = len(self._cache) + if n >= cache_len: + self._cache.extend(islice(self._it, n - cache_len)) + + return list(self._cache)[index] + + def __getitem__(self, index): + if isinstance(index, slice): + return self._get_slice(index) + + cache_len = len(self._cache) + if index < 0: + self._cache.extend(self._it) + elif index >= cache_len: + self._cache.extend(islice(self._it, index + 1 - cache_len)) + + return self._cache[index] + + +def consumer(func): + """Decorator that automatically advances a PEP-342-style "reverse iterator" + to its first yield point so you don't have to call ``next()`` on it + manually. + + >>> @consumer + ... def tally(): + ... i = 0 + ... while True: + ... print('Thing number %s is %s.' % (i, (yield))) + ... i += 1 + ... + >>> t = tally() + >>> t.send('red') + Thing number 0 is red. + >>> t.send('fish') + Thing number 1 is fish. + + Without the decorator, you would have to call ``next(t)`` before + ``t.send()`` could be used. + + """ + + @wraps(func) + def wrapper(*args, **kwargs): + gen = func(*args, **kwargs) + next(gen) + return gen + + return wrapper + + +def ilen(iterable): + """Return the number of items in *iterable*. + + >>> ilen(x for x in range(1000000) if x % 3 == 0) + 333334 + + This consumes the iterable, so handle with care. + + """ + # This approach was selected because benchmarks showed it's likely the + # fastest of the known implementations at the time of writing. + # See GitHub tracker: #236, #230. + counter = count() + deque(zip(iterable, counter), maxlen=0) + return next(counter) + + +def iterate(func, start): + """Return ``start``, ``func(start)``, ``func(func(start))``, ... + + >>> from itertools import islice + >>> list(islice(iterate(lambda x: 2*x, 1), 10)) + [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] + + """ + while True: + yield start + start = func(start) + + +def with_iter(context_manager): + """Wrap an iterable in a ``with`` statement, so it closes once exhausted. + + For example, this will close the file when the iterator is exhausted:: + + upper_lines = (line.upper() for line in with_iter(open('foo'))) + + Any context manager which returns an iterable is a candidate for + ``with_iter``. + + """ + with context_manager as iterable: + yield from iterable + + +def one(iterable, too_short=None, too_long=None): + """Return the first item from *iterable*, which is expected to contain only + that item. Raise an exception if *iterable* is empty or has more than one + item. + + :func:`one` is useful for ensuring that an iterable contains only one item. + For example, it can be used to retrieve the result of a database query + that is expected to return a single row. + + If *iterable* is empty, ``ValueError`` will be raised. You may specify a + different exception with the *too_short* keyword: + + >>> it = [] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (expected 1)' + >>> too_short = IndexError('too few items') + >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + IndexError: too few items + + Similarly, if *iterable* contains more than one item, ``ValueError`` will + be raised. You may specify a different exception with the *too_long* + keyword: + + >>> it = ['too', 'many'] + >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 'too', + 'many', and perhaps more. + >>> too_long = RuntimeError + >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + Note that :func:`one` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check iterable + contents less destructively. + + """ + it = iter(iterable) + + try: + first_value = next(it) + except StopIteration as e: + raise ( + too_short or ValueError('too few items in iterable (expected 1)') + ) from e + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +def raise_(exception, *args): + raise exception(*args) + + +def strictly_n(iterable, n, too_short=None, too_long=None): + """Validate that *iterable* has exactly *n* items and return them if + it does. If it has fewer than *n* items, call function *too_short* + with those items. If it has more than *n* items, call function + *too_long* with the first ``n + 1`` items. + + >>> iterable = ['a', 'b', 'c', 'd'] + >>> n = 4 + >>> list(strictly_n(iterable, n)) + ['a', 'b', 'c', 'd'] + + By default, *too_short* and *too_long* are functions that raise + ``ValueError``. + + >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too few items in iterable (got 2) + + >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: too many items in iterable (got at least 3) + + You can instead supply functions that do something else. + *too_short* will be called with the number of items in *iterable*. + *too_long* will be called with `n + 1`. + + >>> def too_short(item_count): + ... raise RuntimeError + >>> it = strictly_n('abcd', 6, too_short=too_short) + >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + RuntimeError + + >>> def too_long(item_count): + ... print('The boss is going to hear about this') + >>> it = strictly_n('abcdef', 4, too_long=too_long) + >>> list(it) + The boss is going to hear about this + ['a', 'b', 'c', 'd'] + + """ + if too_short is None: + too_short = lambda item_count: raise_( + ValueError, + 'Too few items in iterable (got {})'.format(item_count), + ) + + if too_long is None: + too_long = lambda item_count: raise_( + ValueError, + 'Too many items in iterable (got at least {})'.format(item_count), + ) + + it = iter(iterable) + for i in range(n): + try: + item = next(it) + except StopIteration: + too_short(i) + return + else: + yield item + + try: + next(it) + except StopIteration: + pass + else: + too_long(n + 1) + + +def distinct_permutations(iterable, r=None): + """Yield successive distinct permutations of the elements in *iterable*. + + >>> sorted(distinct_permutations([1, 0, 1])) + [(0, 1, 1), (1, 0, 1), (1, 1, 0)] + + Equivalent to ``set(permutations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + Duplicate permutations arise when there are duplicated elements in the + input iterable. The number of items returned is + `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of + items input, and each `x_i` is the count of a distinct item in the input + sequence. + + If *r* is given, only the *r*-length permutations are yielded. + + >>> sorted(distinct_permutations([1, 0, 1], r=2)) + [(0, 1), (1, 0), (1, 1)] + >>> sorted(distinct_permutations(range(3), r=2)) + [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + + """ + + # Algorithm: https://w.wiki/Qai + def _full(A): + while True: + # Yield the permutation we have + yield tuple(A) + + # Find the largest index i such that A[i] < A[i + 1] + for i in range(size - 2, -1, -1): + if A[i] < A[i + 1]: + break + # If no such index exists, this permutation is the last one + else: + return + + # Find the largest index j greater than j such that A[i] < A[j] + for j in range(size - 1, i, -1): + if A[i] < A[j]: + break + + # Swap the value of A[i] with that of A[j], then reverse the + # sequence from A[i + 1] to form the new permutation + A[i], A[j] = A[j], A[i] + A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1] + + # Algorithm: modified from the above + def _partial(A, r): + # Split A into the first r items and the last r items + head, tail = A[:r], A[r:] + right_head_indexes = range(r - 1, -1, -1) + left_tail_indexes = range(len(tail)) + + while True: + # Yield the permutation we have + yield tuple(head) + + # Starting from the right, find the first index of the head with + # value smaller than the maximum value of the tail - call it i. + pivot = tail[-1] + for i in right_head_indexes: + if head[i] < pivot: + break + pivot = head[i] + else: + return + + # Starting from the left, find the first value of the tail + # with a value greater than head[i] and swap. + for j in left_tail_indexes: + if tail[j] > head[i]: + head[i], tail[j] = tail[j], head[i] + break + # If we didn't find one, start from the right and find the first + # index of the head with a value greater than head[i] and swap. + else: + for j in right_head_indexes: + if head[j] > head[i]: + head[i], head[j] = head[j], head[i] + break + + # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)] + tail += head[: i - r : -1] # head[i + 1:][::-1] + i += 1 + head[i:], tail[:] = tail[: r - i], tail[r - i :] + + items = sorted(iterable) + + size = len(items) + if r is None: + r = size + + if 0 < r <= size: + return _full(items) if (r == size) else _partial(items, r) + + return iter(() if r else ((),)) + + +def intersperse(e, iterable, n=1): + """Intersperse filler element *e* among the items in *iterable*, leaving + *n* items between each filler element. + + >>> list(intersperse('!', [1, 2, 3, 4, 5])) + [1, '!', 2, '!', 3, '!', 4, '!', 5] + + >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2)) + [1, 2, None, 3, 4, None, 5] + + """ + if n == 0: + raise ValueError('n must be > 0') + elif n == 1: + # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2... + # islice(..., 1, None) -> x_0, e, x_1, e, x_2... + return islice(interleave(repeat(e), iterable), 1, None) + else: + # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]... + # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]... + # flatten(...) -> x_0, x_1, e, x_2, x_3... + filler = repeat([e]) + chunks = chunked(iterable, n) + return flatten(islice(interleave(filler, chunks), 1, None)) + + +def unique_to_each(*iterables): + """Return the elements from each of the input iterables that aren't in the + other input iterables. + + For example, suppose you have a set of packages, each with a set of + dependencies:: + + {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}} + + If you remove one package, which dependencies can also be removed? + + If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not + associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for + ``pkg_2``, and ``D`` is only needed for ``pkg_3``:: + + >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'}) + [['A'], ['C'], ['D']] + + If there are duplicates in one input iterable that aren't in the others + they will be duplicated in the output. Input order is preserved:: + + >>> unique_to_each("mississippi", "missouri") + [['p', 'p'], ['o', 'u', 'r']] + + It is assumed that the elements of each iterable are hashable. + + """ + pool = [list(it) for it in iterables] + counts = Counter(chain.from_iterable(map(set, pool))) + uniques = {element for element in counts if counts[element] == 1} + return [list(filter(uniques.__contains__, it)) for it in pool] + + +def windowed(seq, n, fillvalue=None, step=1): + """Return a sliding window of width *n* over the given iterable. + + >>> all_windows = windowed([1, 2, 3, 4, 5], 3) + >>> list(all_windows) + [(1, 2, 3), (2, 3, 4), (3, 4, 5)] + + When the window is larger than the iterable, *fillvalue* is used in place + of missing values: + + >>> list(windowed([1, 2, 3], 4)) + [(1, 2, 3, None)] + + Each window will advance in increments of *step*: + + >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2)) + [(1, 2, 3), (3, 4, 5), (5, 6, '!')] + + To slide into the iterable's items, use :func:`chain` to add filler items + to the left: + + >>> iterable = [1, 2, 3, 4] + >>> n = 3 + >>> padding = [None] * (n - 1) + >>> list(windowed(chain(padding, iterable), 3)) + [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)] + """ + if n < 0: + raise ValueError('n must be >= 0') + if n == 0: + yield tuple() + return + if step < 1: + raise ValueError('step must be >= 1') + + window = deque(maxlen=n) + i = n + for _ in map(window.append, seq): + i -= 1 + if not i: + i = step + yield tuple(window) + + size = len(window) + if size == 0: + return + elif size < n: + yield tuple(chain(window, repeat(fillvalue, n - size))) + elif 0 < i < min(step, n): + window += (fillvalue,) * i + yield tuple(window) + + +def substrings(iterable): + """Yield all of the substrings of *iterable*. + + >>> [''.join(s) for s in substrings('more')] + ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more'] + + Note that non-string iterables can also be subdivided. + + >>> list(substrings([0, 1, 2])) + [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)] + + """ + # The length-1 substrings + seq = [] + for item in iter(iterable): + seq.append(item) + yield (item,) + seq = tuple(seq) + item_count = len(seq) + + # And the rest + for n in range(2, item_count + 1): + for i in range(item_count - n + 1): + yield seq[i : i + n] + + +def substrings_indexes(seq, reverse=False): + """Yield all substrings and their positions in *seq* + + The items yielded will be a tuple of the form ``(substr, i, j)``, where + ``substr == seq[i:j]``. + + This function only works for iterables that support slicing, such as + ``str`` objects. + + >>> for item in substrings_indexes('more'): + ... print(item) + ('m', 0, 1) + ('o', 1, 2) + ('r', 2, 3) + ('e', 3, 4) + ('mo', 0, 2) + ('or', 1, 3) + ('re', 2, 4) + ('mor', 0, 3) + ('ore', 1, 4) + ('more', 0, 4) + + Set *reverse* to ``True`` to yield the same items in the opposite order. + + + """ + r = range(1, len(seq) + 1) + if reverse: + r = reversed(r) + return ( + (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1) + ) + + +class bucket: + """Wrap *iterable* and return an object that buckets it iterable into + child iterables based on a *key* function. + + >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] + >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character + >>> sorted(list(s)) # Get the keys + ['a', 'b', 'c'] + >>> a_iterable = s['a'] + >>> next(a_iterable) + 'a1' + >>> next(a_iterable) + 'a2' + >>> list(s['b']) + ['b1', 'b2', 'b3'] + + The original iterable will be advanced and its items will be cached until + they are used by the child iterables. This may require significant storage. + + By default, attempting to select a bucket to which no items belong will + exhaust the iterable and cache all values. + If you specify a *validator* function, selected buckets will instead be + checked against it. + + >>> from itertools import count + >>> it = count(1, 2) # Infinite sequence of odd numbers + >>> key = lambda x: x % 10 # Bucket by last digit + >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only + >>> s = bucket(it, key=key, validator=validator) + >>> 2 in s + False + >>> list(s[2]) + [] + + """ + + def __init__(self, iterable, key, validator=None): + self._it = iter(iterable) + self._key = key + self._cache = defaultdict(deque) + self._validator = validator or (lambda x: True) + + def __contains__(self, value): + if not self._validator(value): + return False + + try: + item = next(self[value]) + except StopIteration: + return False + else: + self._cache[value].appendleft(item) + + return True + + def _get_values(self, value): + """ + Helper to yield items from the parent iterator that match *value*. + Items that don't match are stored in the local cache as they + are encountered. + """ + while True: + # If we've cached some items that match the target value, emit + # the first one and evict it from the cache. + if self._cache[value]: + yield self._cache[value].popleft() + # Otherwise we need to advance the parent iterator to search for + # a matching item, caching the rest. + else: + while True: + try: + item = next(self._it) + except StopIteration: + return + item_value = self._key(item) + if item_value == value: + yield item + break + elif self._validator(item_value): + self._cache[item_value].append(item) + + def __iter__(self): + for item in self._it: + item_value = self._key(item) + if self._validator(item_value): + self._cache[item_value].append(item) + + yield from self._cache.keys() + + def __getitem__(self, value): + if not self._validator(value): + return iter(()) + + return self._get_values(value) + + +def spy(iterable, n=1): + """Return a 2-tuple with a list containing the first *n* elements of + *iterable*, and an iterator with the same items as *iterable*. + This allows you to "look ahead" at the items in the iterable without + advancing it. + + There is one item in the list by default: + + >>> iterable = 'abcdefg' + >>> head, iterable = spy(iterable) + >>> head + ['a'] + >>> list(iterable) + ['a', 'b', 'c', 'd', 'e', 'f', 'g'] + + You may use unpacking to retrieve items instead of lists: + + >>> (head,), iterable = spy('abcdefg') + >>> head + 'a' + >>> (first, second), iterable = spy('abcdefg', 2) + >>> first + 'a' + >>> second + 'b' + + The number of items requested can be larger than the number of items in + the iterable: + + >>> iterable = [1, 2, 3, 4, 5] + >>> head, iterable = spy(iterable, 10) + >>> head + [1, 2, 3, 4, 5] + >>> list(iterable) + [1, 2, 3, 4, 5] + + """ + it = iter(iterable) + head = take(n, it) + + return head.copy(), chain(head, it) + + +def interleave(*iterables): + """Return a new iterable yielding from each iterable in turn, + until the shortest is exhausted. + + >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7] + + For a version that doesn't terminate after the shortest iterable is + exhausted, see :func:`interleave_longest`. + + """ + return chain.from_iterable(zip(*iterables)) + + +def interleave_longest(*iterables): + """Return a new iterable yielding from each iterable in turn, + skipping any that are exhausted. + + >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8])) + [1, 4, 6, 2, 5, 7, 3, 8] + + This function produces the same output as :func:`roundrobin`, but may + perform better for some inputs (in particular when the number of iterables + is large). + + """ + i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker)) + return (x for x in i if x is not _marker) + + +def interleave_evenly(iterables, lengths=None): + """ + Interleave multiple iterables so that their elements are evenly distributed + throughout the output sequence. + + >>> iterables = [1, 2, 3, 4, 5], ['a', 'b'] + >>> list(interleave_evenly(iterables)) + [1, 2, 'a', 3, 4, 'b', 5] + + >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]] + >>> list(interleave_evenly(iterables)) + [1, 6, 4, 2, 7, 3, 8, 5] + + This function requires iterables of known length. Iterables without + ``__len__()`` can be used by manually specifying lengths with *lengths*: + + >>> from itertools import combinations, repeat + >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']] + >>> lengths = [4 * (4 - 1) // 2, 3] + >>> list(interleave_evenly(iterables, lengths=lengths)) + [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c'] + + Based on Bresenham's algorithm. + """ + if lengths is None: + try: + lengths = [len(it) for it in iterables] + except TypeError: + raise ValueError( + 'Iterable lengths could not be determined automatically. ' + 'Specify them with the lengths keyword.' + ) + elif len(iterables) != len(lengths): + raise ValueError('Mismatching number of iterables and lengths.') + + dims = len(lengths) + + # sort iterables by length, descending + lengths_permute = sorted( + range(dims), key=lambda i: lengths[i], reverse=True + ) + lengths_desc = [lengths[i] for i in lengths_permute] + iters_desc = [iter(iterables[i]) for i in lengths_permute] + + # the longest iterable is the primary one (Bresenham: the longest + # distance along an axis) + delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:] + iter_primary, iters_secondary = iters_desc[0], iters_desc[1:] + errors = [delta_primary // dims] * len(deltas_secondary) + + to_yield = sum(lengths) + while to_yield: + yield next(iter_primary) + to_yield -= 1 + # update errors for each secondary iterable + errors = [e - delta for e, delta in zip(errors, deltas_secondary)] + + # those iterables for which the error is negative are yielded + # ("diagonal step" in Bresenham) + for i, e in enumerate(errors): + if e < 0: + yield next(iters_secondary[i]) + to_yield -= 1 + errors[i] += delta_primary + + +def collapse(iterable, base_type=None, levels=None): + """Flatten an iterable with multiple levels of nesting (e.g., a list of + lists of tuples) into non-iterable types. + + >>> iterable = [(1, 2), ([3, 4], [[5], [6]])] + >>> list(collapse(iterable)) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and + will not be collapsed. + + To avoid collapsing other types, specify *base_type*: + + >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']] + >>> list(collapse(iterable, base_type=tuple)) + ['ab', ('cd', 'ef'), 'gh', 'ij'] + + Specify *levels* to stop flattening after a certain level: + + >>> iterable = [('a', ['b']), ('c', ['d'])] + >>> list(collapse(iterable)) # Fully flattened + ['a', 'b', 'c', 'd'] + >>> list(collapse(iterable, levels=1)) # Only one level flattened + ['a', ['b'], 'c', ['d']] + + """ + + def walk(node, level): + if ( + ((levels is not None) and (level > levels)) + or isinstance(node, (str, bytes)) + or ((base_type is not None) and isinstance(node, base_type)) + ): + yield node + return + + try: + tree = iter(node) + except TypeError: + yield node + return + else: + for child in tree: + yield from walk(child, level + 1) + + yield from walk(iterable, 0) + + +def side_effect(func, iterable, chunk_size=None, before=None, after=None): + """Invoke *func* on each item in *iterable* (or on each *chunk_size* group + of items) before yielding the item. + + `func` must be a function that takes a single argument. Its return value + will be discarded. + + *before* and *after* are optional functions that take no arguments. They + will be executed before iteration starts and after it ends, respectively. + + `side_effect` can be used for logging, updating progress bars, or anything + that is not functionally "pure." + + Emitting a status message: + + >>> from more_itertools import consume + >>> func = lambda item: print('Received {}'.format(item)) + >>> consume(side_effect(func, range(2))) + Received 0 + Received 1 + + Operating on chunks of items: + + >>> pair_sums = [] + >>> func = lambda chunk: pair_sums.append(sum(chunk)) + >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2)) + [0, 1, 2, 3, 4, 5] + >>> list(pair_sums) + [1, 5, 9] + + Writing to a file-like object: + + >>> from io import StringIO + >>> from more_itertools import consume + >>> f = StringIO() + >>> func = lambda x: print(x, file=f) + >>> before = lambda: print(u'HEADER', file=f) + >>> after = f.close + >>> it = [u'a', u'b', u'c'] + >>> consume(side_effect(func, it, before=before, after=after)) + >>> f.closed + True + + """ + try: + if before is not None: + before() + + if chunk_size is None: + for item in iterable: + func(item) + yield item + else: + for chunk in chunked(iterable, chunk_size): + func(chunk) + yield from chunk + finally: + if after is not None: + after() + + +def sliced(seq, n, strict=False): + """Yield slices of length *n* from the sequence *seq*. + + >>> list(sliced((1, 2, 3, 4, 5, 6), 3)) + [(1, 2, 3), (4, 5, 6)] + + By the default, the last yielded slice will have fewer than *n* elements + if the length of *seq* is not divisible by *n*: + + >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3)) + [(1, 2, 3), (4, 5, 6), (7, 8)] + + If the length of *seq* is not divisible by *n* and *strict* is + ``True``, then ``ValueError`` will be raised before the last + slice is yielded. + + This function will only work for iterables that support slicing. + For non-sliceable iterables, see :func:`chunked`. + + """ + iterator = takewhile(len, (seq[i : i + n] for i in count(0, n))) + if strict: + + def ret(): + for _slice in iterator: + if len(_slice) != n: + raise ValueError("seq is not divisible by n.") + yield _slice + + return iter(ret()) + else: + return iterator + + +def split_at(iterable, pred, maxsplit=-1, keep_separator=False): + """Yield lists of items from *iterable*, where each list is delimited by + an item where callable *pred* returns ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b')) + [['a'], ['c', 'd', 'c'], ['a']] + + >>> list(split_at(range(10), lambda n: n % 2 == 1)) + [[0], [2], [4], [6], [8], []] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2)) + [[0], [2], [4, 5, 6, 7, 8, 9]] + + By default, the delimiting items are not included in the output. + To include them, set *keep_separator* to ``True``. + + >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True)) + [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item): + yield buf + if keep_separator: + yield [item] + if maxsplit == 1: + yield list(it) + return + buf = [] + maxsplit -= 1 + else: + buf.append(item) + yield buf + + +def split_before(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends just before + an item for which callable *pred* returns ``True``: + + >>> list(split_before('OneTwo', lambda s: s.isupper())) + [['O', 'n', 'e'], ['T', 'w', 'o']] + + >>> list(split_before(range(10), lambda n: n % 3 == 0)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]] + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + if pred(item) and buf: + yield buf + if maxsplit == 1: + yield [item] + list(it) + return + buf = [] + maxsplit -= 1 + buf.append(item) + if buf: + yield buf + + +def split_after(iterable, pred, maxsplit=-1): + """Yield lists of items from *iterable*, where each list ends with an + item where callable *pred* returns ``True``: + + >>> list(split_after('one1two2', lambda s: s.isdigit())) + [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']] + + >>> list(split_after(range(10), lambda n: n % 3 == 0)) + [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2)) + [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + buf = [] + it = iter(iterable) + for item in it: + buf.append(item) + if pred(item) and buf: + yield buf + if maxsplit == 1: + buf = list(it) + if buf: + yield buf + return + buf = [] + maxsplit -= 1 + if buf: + yield buf + + +def split_when(iterable, pred, maxsplit=-1): + """Split *iterable* into pieces based on the output of *pred*. + *pred* should be a function that takes successive pairs of items and + returns ``True`` if the iterable should be split in between them. + + For example, to find runs of increasing numbers, split the iterable when + element ``i`` is larger than element ``i + 1``: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y)) + [[1, 2, 3, 3], [2, 5], [2, 4], [2]] + + At most *maxsplit* splits are done. If *maxsplit* is not specified or -1, + then there is no limit on the number of splits: + + >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], + ... lambda x, y: x > y, maxsplit=2)) + [[1, 2, 3, 3], [2, 5], [2, 4, 2]] + + """ + if maxsplit == 0: + yield list(iterable) + return + + it = iter(iterable) + try: + cur_item = next(it) + except StopIteration: + return + + buf = [cur_item] + for next_item in it: + if pred(cur_item, next_item): + yield buf + if maxsplit == 1: + yield [next_item] + list(it) + return + buf = [] + maxsplit -= 1 + + buf.append(next_item) + cur_item = next_item + + yield buf + + +def split_into(iterable, sizes): + """Yield a list of sequential items from *iterable* of length 'n' for each + integer 'n' in *sizes*. + + >>> list(split_into([1,2,3,4,5,6], [1,2,3])) + [[1], [2, 3], [4, 5, 6]] + + If the sum of *sizes* is smaller than the length of *iterable*, then the + remaining items of *iterable* will not be returned. + + >>> list(split_into([1,2,3,4,5,6], [2,3])) + [[1, 2], [3, 4, 5]] + + If the sum of *sizes* is larger than the length of *iterable*, fewer items + will be returned in the iteration that overruns *iterable* and further + lists will be empty: + + >>> list(split_into([1,2,3,4], [1,2,3,4])) + [[1], [2, 3], [4], []] + + When a ``None`` object is encountered in *sizes*, the returned list will + contain items up to the end of *iterable* the same way that itertools.slice + does: + + >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None])) + [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]] + + :func:`split_into` can be useful for grouping a series of items where the + sizes of the groups are not uniform. An example would be where in a row + from a table, multiple columns represent elements of the same feature + (e.g. a point represented by x,y,z) but, the format is not the same for + all columns. + """ + # convert the iterable argument into an iterator so its contents can + # be consumed by islice in case it is a generator + it = iter(iterable) + + for size in sizes: + if size is None: + yield list(it) + return + else: + yield list(islice(it, size)) + + +def padded(iterable, fillvalue=None, n=None, next_multiple=False): + """Yield the elements from *iterable*, followed by *fillvalue*, such that + at least *n* items are emitted. + + >>> list(padded([1, 2, 3], '?', 5)) + [1, 2, 3, '?', '?'] + + If *next_multiple* is ``True``, *fillvalue* will be emitted until the + number of items emitted is a multiple of *n*:: + + >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True)) + [1, 2, 3, 4, None, None] + + If *n* is ``None``, *fillvalue* will be emitted indefinitely. + + """ + it = iter(iterable) + if n is None: + yield from chain(it, repeat(fillvalue)) + elif n < 1: + raise ValueError('n must be at least 1') + else: + item_count = 0 + for item in it: + yield item + item_count += 1 + + remaining = (n - item_count) % n if next_multiple else n - item_count + for _ in range(remaining): + yield fillvalue + + +def repeat_each(iterable, n=2): + """Repeat each element in *iterable* *n* times. + + >>> list(repeat_each('ABC', 3)) + ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C'] + """ + return chain.from_iterable(map(repeat, iterable, repeat(n))) + + +def repeat_last(iterable, default=None): + """After the *iterable* is exhausted, keep yielding its last element. + + >>> list(islice(repeat_last(range(3)), 5)) + [0, 1, 2, 2, 2] + + If the iterable is empty, yield *default* forever:: + + >>> list(islice(repeat_last(range(0), 42), 5)) + [42, 42, 42, 42, 42] + + """ + item = _marker + for item in iterable: + yield item + final = default if item is _marker else item + yield from repeat(final) + + +def distribute(n, iterable): + """Distribute the items from *iterable* among *n* smaller iterables. + + >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 3, 5] + >>> list(group_2) + [2, 4, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 4, 7], [2, 5], [3, 6]] + + If the length of *iterable* is smaller than *n*, then the last returned + iterables will be empty: + + >>> children = distribute(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function uses :func:`itertools.tee` and may require significant + storage. If you need the order items in the smaller iterables to match the + original iterable, see :func:`divide`. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + children = tee(iterable, n) + return [islice(it, index, None, n) for index, it in enumerate(children)] + + +def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None): + """Yield tuples whose elements are offset from *iterable*. + The amount by which the `i`-th item in each tuple is offset is given by + the `i`-th item in *offsets*. + + >>> list(stagger([0, 1, 2, 3])) + [(None, 0, 1), (0, 1, 2), (1, 2, 3)] + >>> list(stagger(range(8), offsets=(0, 2, 4))) + [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)] + + By default, the sequence will end when the final element of a tuple is the + last item in the iterable. To continue until the first element of a tuple + is the last item in the iterable, set *longest* to ``True``:: + + >>> list(stagger([0, 1, 2, 3], longest=True)) + [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + children = tee(iterable, len(offsets)) + + return zip_offset( + *children, offsets=offsets, longest=longest, fillvalue=fillvalue + ) + + +def zip_equal(*iterables): + """``zip`` the input *iterables* together, but raise + ``UnequalIterablesError`` if they aren't all the same length. + + >>> it_1 = range(3) + >>> it_2 = iter('abc') + >>> list(zip_equal(it_1, it_2)) + [(0, 'a'), (1, 'b'), (2, 'c')] + + >>> it_1 = range(3) + >>> it_2 = iter('abcd') + >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + more_itertools.more.UnequalIterablesError: Iterables have different + lengths + + """ + if hexversion >= 0x30A00A6: + warnings.warn( + ( + 'zip_equal will be removed in a future version of ' + 'more-itertools. Use the builtin zip function with ' + 'strict=True instead.' + ), + DeprecationWarning, + ) + + return _zip_equal(*iterables) + + +def zip_offset(*iterables, offsets, longest=False, fillvalue=None): + """``zip`` the input *iterables* together, but offset the `i`-th iterable + by the `i`-th item in *offsets*. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1))) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')] + + This can be used as a lightweight alternative to SciPy or pandas to analyze + data sets in which some series have a lead or lag relationship. + + By default, the sequence will end when the shortest iterable is exhausted. + To continue until the longest iterable is exhausted, set *longest* to + ``True``. + + >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True)) + [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')] + + By default, ``None`` will be used to replace offsets beyond the end of the + sequence. Specify *fillvalue* to use some other value. + + """ + if len(iterables) != len(offsets): + raise ValueError("Number of iterables and offsets didn't match") + + staggered = [] + for it, n in zip(iterables, offsets): + if n < 0: + staggered.append(chain(repeat(fillvalue, -n), it)) + elif n > 0: + staggered.append(islice(it, n, None)) + else: + staggered.append(it) + + if longest: + return zip_longest(*staggered, fillvalue=fillvalue) + + return zip(*staggered) + + +def sort_together(iterables, key_list=(0,), key=None, reverse=False): + """Return the input iterables sorted together, with *key_list* as the + priority for sorting. All iterables are trimmed to the length of the + shortest one. + + This can be used like the sorting function in a spreadsheet. If each + iterable represents a column of data, the key list determines which + columns are used for sorting. + + By default, all iterables are sorted using the ``0``-th iterable:: + + >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')] + >>> sort_together(iterables) + [(1, 2, 3, 4), ('d', 'c', 'b', 'a')] + + Set a different key list to sort according to another iterable. + Specifying multiple keys dictates how ties are broken:: + + >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')] + >>> sort_together(iterables, key_list=(1, 2)) + [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')] + + To sort by a function of the elements of the iterable, pass a *key* + function. Its arguments are the elements of the iterables corresponding to + the key list:: + + >>> names = ('a', 'b', 'c') + >>> lengths = (1, 2, 3) + >>> widths = (5, 2, 1) + >>> def area(length, width): + ... return length * width + >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area) + [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)] + + Set *reverse* to ``True`` to sort in descending order. + + >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True) + [(3, 2, 1), ('a', 'b', 'c')] + + """ + if key is None: + # if there is no key function, the key argument to sorted is an + # itemgetter + key_argument = itemgetter(*key_list) + else: + # if there is a key function, call it with the items at the offsets + # specified by the key function as arguments + key_list = list(key_list) + if len(key_list) == 1: + # if key_list contains a single item, pass the item at that offset + # as the only argument to the key function + key_offset = key_list[0] + key_argument = lambda zipped_items: key(zipped_items[key_offset]) + else: + # if key_list contains multiple items, use itemgetter to return a + # tuple of items, which we pass as *args to the key function + get_key_items = itemgetter(*key_list) + key_argument = lambda zipped_items: key( + *get_key_items(zipped_items) + ) + + return list( + zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse)) + ) + + +def unzip(iterable): + """The inverse of :func:`zip`, this function disaggregates the elements + of the zipped *iterable*. + + The ``i``-th iterable contains the ``i``-th element from each element + of the zipped iterable. The first element is used to determine the + length of the remaining elements. + + >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> letters, numbers = unzip(iterable) + >>> list(letters) + ['a', 'b', 'c', 'd'] + >>> list(numbers) + [1, 2, 3, 4] + + This is similar to using ``zip(*iterable)``, but it avoids reading + *iterable* into memory. Note, however, that this function uses + :func:`itertools.tee` and thus may require significant storage. + + """ + head, iterable = spy(iter(iterable)) + if not head: + # empty iterable, e.g. zip([], [], []) + return () + # spy returns a one-length iterable as head + head = head[0] + iterables = tee(iterable, len(head)) + + def itemgetter(i): + def getter(obj): + try: + return obj[i] + except IndexError: + # basically if we have an iterable like + # iter([(1, 2, 3), (4, 5), (6,)]) + # the second unzipped iterable would fail at the third tuple + # since it would try to access tup[1] + # same with the third unzipped iterable and the second tuple + # to support these "improperly zipped" iterables, + # we create a custom itemgetter + # which just stops the unzipped iterables + # at first length mismatch + raise StopIteration + + return getter + + return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables)) + + +def divide(n, iterable): + """Divide the elements from *iterable* into *n* parts, maintaining + order. + + >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6]) + >>> list(group_1) + [1, 2, 3] + >>> list(group_2) + [4, 5, 6] + + If the length of *iterable* is not evenly divisible by *n*, then the + length of the returned iterables will not be identical: + + >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7]) + >>> [list(c) for c in children] + [[1, 2, 3], [4, 5], [6, 7]] + + If the length of the iterable is smaller than n, then the last returned + iterables will be empty: + + >>> children = divide(5, [1, 2, 3]) + >>> [list(c) for c in children] + [[1], [2], [3], [], []] + + This function will exhaust the iterable before returning and may require + significant storage. If order is not important, see :func:`distribute`, + which does not first pull the iterable into memory. + + """ + if n < 1: + raise ValueError('n must be at least 1') + + try: + iterable[:0] + except TypeError: + seq = tuple(iterable) + else: + seq = iterable + + q, r = divmod(len(seq), n) + + ret = [] + stop = 0 + for i in range(1, n + 1): + start = stop + stop += q + 1 if i <= r else q + ret.append(iter(seq[start:stop])) + + return ret + + +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) + + +def adjacent(predicate, iterable, distance=1): + """Return an iterable over `(bool, item)` tuples where the `item` is + drawn from *iterable* and the `bool` indicates whether + that item satisfies the *predicate* or is adjacent to an item that does. + + For example, to find whether items are adjacent to a ``3``:: + + >>> list(adjacent(lambda x: x == 3, range(6))) + [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)] + + Set *distance* to change what counts as adjacent. For example, to find + whether items are two places away from a ``3``: + + >>> list(adjacent(lambda x: x == 3, range(6), distance=2)) + [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)] + + This is useful for contextualizing the results of a search function. + For example, a code comparison tool might want to identify lines that + have changed, but also surrounding lines to give the viewer of the diff + context. + + The predicate function will only be called once for each item in the + iterable. + + See also :func:`groupby_transform`, which can be used with this function + to group ranges of items with the same `bool` value. + + """ + # Allow distance=0 mainly for testing that it reproduces results with map() + if distance < 0: + raise ValueError('distance must be at least 0') + + i1, i2 = tee(iterable) + padding = [False] * distance + selected = chain(padding, map(predicate, i1), padding) + adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1)) + return zip(adjacent_to_selected, i2) + + +def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None): + """An extension of :func:`itertools.groupby` that can apply transformations + to the grouped data. + + * *keyfunc* is a function computing a key value for each item in *iterable* + * *valuefunc* is a function that transforms the individual items from + *iterable* after grouping + * *reducefunc* is a function that transforms each group of items + + >>> iterable = 'aAAbBBcCC' + >>> keyfunc = lambda k: k.upper() + >>> valuefunc = lambda v: v.lower() + >>> reducefunc = lambda g: ''.join(g) + >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc)) + [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')] + + Each optional argument defaults to an identity function if not specified. + + :func:`groupby_transform` is useful when grouping elements of an iterable + using a separate iterable as the key. To do this, :func:`zip` the iterables + and pass a *keyfunc* that extracts the first element and a *valuefunc* + that extracts the second element:: + + >>> from operator import itemgetter + >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3] + >>> values = 'abcdefghi' + >>> iterable = zip(keys, values) + >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1)) + >>> [(k, ''.join(g)) for k, g in grouper] + [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')] + + Note that the order of items in the iterable is significant. + Only adjacent items are grouped together, so if you don't want any + duplicate groups, you should sort the iterable by the key function. + + """ + ret = groupby(iterable, keyfunc) + if valuefunc: + ret = ((k, map(valuefunc, g)) for k, g in ret) + if reducefunc: + ret = ((k, reducefunc(g)) for k, g in ret) + + return ret + + +class numeric_range(abc.Sequence, abc.Hashable): + """An extension of the built-in ``range()`` function whose arguments can + be any orderable numeric type. + + With only *stop* specified, *start* defaults to ``0`` and *step* + defaults to ``1``. The output items will match the type of *stop*: + + >>> list(numeric_range(3.5)) + [0.0, 1.0, 2.0, 3.0] + + With only *start* and *stop* specified, *step* defaults to ``1``. The + output items will match the type of *start*: + + >>> from decimal import Decimal + >>> start = Decimal('2.1') + >>> stop = Decimal('5.1') + >>> list(numeric_range(start, stop)) + [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')] + + With *start*, *stop*, and *step* specified the output items will match + the type of ``start + step``: + + >>> from fractions import Fraction + >>> start = Fraction(1, 2) # Start at 1/2 + >>> stop = Fraction(5, 2) # End at 5/2 + >>> step = Fraction(1, 2) # Count by 1/2 + >>> list(numeric_range(start, stop, step)) + [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)] + + If *step* is zero, ``ValueError`` is raised. Negative steps are supported: + + >>> list(numeric_range(3, -1, -1.0)) + [3.0, 2.0, 1.0, 0.0] + + Be aware of the limitations of floating point numbers; the representation + of the yielded numbers may be surprising. + + ``datetime.datetime`` objects can be used for *start* and *stop*, if *step* + is a ``datetime.timedelta`` object: + + >>> import datetime + >>> start = datetime.datetime(2019, 1, 1) + >>> stop = datetime.datetime(2019, 1, 3) + >>> step = datetime.timedelta(days=1) + >>> items = iter(numeric_range(start, stop, step)) + >>> next(items) + datetime.datetime(2019, 1, 1, 0, 0) + >>> next(items) + datetime.datetime(2019, 1, 2, 0, 0) + + """ + + _EMPTY_HASH = hash(range(0, 0)) + + def __init__(self, *args): + argc = len(args) + if argc == 1: + (self._stop,) = args + self._start = type(self._stop)(0) + self._step = type(self._stop - self._start)(1) + elif argc == 2: + self._start, self._stop = args + self._step = type(self._stop - self._start)(1) + elif argc == 3: + self._start, self._stop, self._step = args + elif argc == 0: + raise TypeError( + 'numeric_range expected at least ' + '1 argument, got {}'.format(argc) + ) + else: + raise TypeError( + 'numeric_range expected at most ' + '3 arguments, got {}'.format(argc) + ) + + self._zero = type(self._step)(0) + if self._step == self._zero: + raise ValueError('numeric_range() arg 3 must not be zero') + self._growing = self._step > self._zero + self._init_len() + + def __bool__(self): + if self._growing: + return self._start < self._stop + else: + return self._start > self._stop + + def __contains__(self, elem): + if self._growing: + if self._start <= elem < self._stop: + return (elem - self._start) % self._step == self._zero + else: + if self._start >= elem > self._stop: + return (self._start - elem) % (-self._step) == self._zero + + return False + + def __eq__(self, other): + if isinstance(other, numeric_range): + empty_self = not bool(self) + empty_other = not bool(other) + if empty_self or empty_other: + return empty_self and empty_other # True if both empty + else: + return ( + self._start == other._start + and self._step == other._step + and self._get_by_index(-1) == other._get_by_index(-1) + ) + else: + return False + + def __getitem__(self, key): + if isinstance(key, int): + return self._get_by_index(key) + elif isinstance(key, slice): + step = self._step if key.step is None else key.step * self._step + + if key.start is None or key.start <= -self._len: + start = self._start + elif key.start >= self._len: + start = self._stop + else: # -self._len < key.start < self._len + start = self._get_by_index(key.start) + + if key.stop is None or key.stop >= self._len: + stop = self._stop + elif key.stop <= -self._len: + stop = self._start + else: # -self._len < key.stop < self._len + stop = self._get_by_index(key.stop) + + return numeric_range(start, stop, step) + else: + raise TypeError( + 'numeric range indices must be ' + 'integers or slices, not {}'.format(type(key).__name__) + ) + + def __hash__(self): + if self: + return hash((self._start, self._get_by_index(-1), self._step)) + else: + return self._EMPTY_HASH + + def __iter__(self): + values = (self._start + (n * self._step) for n in count()) + if self._growing: + return takewhile(partial(gt, self._stop), values) + else: + return takewhile(partial(lt, self._stop), values) + + def __len__(self): + return self._len + + def _init_len(self): + if self._growing: + start = self._start + stop = self._stop + step = self._step + else: + start = self._stop + stop = self._start + step = -self._step + distance = stop - start + if distance <= self._zero: + self._len = 0 + else: # distance > 0 and step > 0: regular euclidean division + q, r = divmod(distance, step) + self._len = int(q) + int(r != self._zero) + + def __reduce__(self): + return numeric_range, (self._start, self._stop, self._step) + + def __repr__(self): + if self._step == 1: + return "numeric_range({}, {})".format( + repr(self._start), repr(self._stop) + ) + else: + return "numeric_range({}, {}, {})".format( + repr(self._start), repr(self._stop), repr(self._step) + ) + + def __reversed__(self): + return iter( + numeric_range( + self._get_by_index(-1), self._start - self._step, -self._step + ) + ) + + def count(self, value): + return int(value in self) + + def index(self, value): + if self._growing: + if self._start <= value < self._stop: + q, r = divmod(value - self._start, self._step) + if r == self._zero: + return int(q) + else: + if self._start >= value > self._stop: + q, r = divmod(self._start - value, -self._step) + if r == self._zero: + return int(q) + + raise ValueError("{} is not in numeric range".format(value)) + + def _get_by_index(self, i): + if i < 0: + i += self._len + if i < 0 or i >= self._len: + raise IndexError("numeric range object index out of range") + return self._start + i * self._step + + +def count_cycle(iterable, n=None): + """Cycle through the items from *iterable* up to *n* times, yielding + the number of completed cycles along with each item. If *n* is omitted the + process repeats indefinitely. + + >>> list(count_cycle('AB', 3)) + [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')] + + """ + iterable = tuple(iterable) + if not iterable: + return iter(()) + counter = count() if n is None else range(n) + return ((i, item) for i in counter for item in iterable) + + +def mark_ends(iterable): + """Yield 3-tuples of the form ``(is_first, is_last, item)``. + + >>> list(mark_ends('ABC')) + [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')] + + Use this when looping over an iterable to take special action on its first + and/or last items: + + >>> iterable = ['Header', 100, 200, 'Footer'] + >>> total = 0 + >>> for is_first, is_last, item in mark_ends(iterable): + ... if is_first: + ... continue # Skip the header + ... if is_last: + ... continue # Skip the footer + ... total += item + >>> print(total) + 300 + """ + it = iter(iterable) + + try: + b = next(it) + except StopIteration: + return + + try: + for i in count(): + a = b + b = next(it) + yield i == 0, False, a + + except StopIteration: + yield i == 0, True, a + + +def locate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(locate([0, 1, 1, 0, 1, 0, 0])) + [1, 2, 4] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item. + + >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b')) + [1, 3] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(locate(iterable, pred=pred, window_size=3)) + [1, 5, 9] + + Use with :func:`seekable` to find indexes and then retrieve the associated + items: + + >>> from itertools import count + >>> from more_itertools import seekable + >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count()) + >>> it = seekable(source) + >>> pred = lambda x: x > 100 + >>> indexes = locate(it, pred=pred) + >>> i = next(indexes) + >>> it.seek(i) + >>> next(it) + 106 + + """ + if window_size is None: + return compress(count(), map(pred, iterable)) + + if window_size < 1: + raise ValueError('window size must be at least 1') + + it = windowed(iterable, window_size, fillvalue=_marker) + return compress(count(), starmap(pred, it)) + + +def longest_common_prefix(iterables): + """Yield elements of the longest common prefix amongst given *iterables*. + + >>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf'])) + 'ab' + + """ + return (c[0] for c in takewhile(all_equal, zip(*iterables))) + + +def lstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the beginning + for which *pred* returns ``True``. + + For example, to remove a set of items from the start of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(lstrip(iterable, pred)) + [1, 2, None, 3, False, None] + + This function is analogous to to :func:`str.lstrip`, and is essentially + an wrapper for :func:`itertools.dropwhile`. + + """ + return dropwhile(pred, iterable) + + +def rstrip(iterable, pred): + """Yield the items from *iterable*, but strip any from the end + for which *pred* returns ``True``. + + For example, to remove a set of items from the end of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(rstrip(iterable, pred)) + [None, False, None, 1, 2, None, 3] + + This function is analogous to :func:`str.rstrip`. + + """ + cache = [] + cache_append = cache.append + cache_clear = cache.clear + for x in iterable: + if pred(x): + cache_append(x) + else: + yield from cache + cache_clear() + yield x + + +def strip(iterable, pred): + """Yield the items from *iterable*, but strip any from the + beginning and end for which *pred* returns ``True``. + + For example, to remove a set of items from both ends of an iterable: + + >>> iterable = (None, False, None, 1, 2, None, 3, False, None) + >>> pred = lambda x: x in {None, False, ''} + >>> list(strip(iterable, pred)) + [1, 2, None, 3] + + This function is analogous to :func:`str.strip`. + + """ + return rstrip(lstrip(iterable, pred), pred) + + +class islice_extended: + """An extension of :func:`itertools.islice` that supports negative values + for *stop*, *start*, and *step*. + + >>> iterable = iter('abcdefgh') + >>> list(islice_extended(iterable, -4, -1)) + ['e', 'f', 'g'] + + Slices with negative values require some caching of *iterable*, but this + function takes care to minimize the amount of memory required. + + For example, you can use a negative step with an infinite iterator: + + >>> from itertools import count + >>> list(islice_extended(count(), 110, 99, -2)) + [110, 108, 106, 104, 102, 100] + + You can also use slice notation directly: + + >>> iterable = map(str, count()) + >>> it = islice_extended(iterable)[10:20:2] + >>> list(it) + ['10', '12', '14', '16', '18'] + + """ + + def __init__(self, iterable, *args): + it = iter(iterable) + if args: + self._iterable = _islice_helper(it, slice(*args)) + else: + self._iterable = it + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterable) + + def __getitem__(self, key): + if isinstance(key, slice): + return islice_extended(_islice_helper(self._iterable, key)) + + raise TypeError('islice_extended.__getitem__ argument must be a slice') + + +def _islice_helper(it, s): + start = s.start + stop = s.stop + if s.step == 0: + raise ValueError('step argument must be a non-zero integer or None.') + step = s.step or 1 + + if step > 0: + start = 0 if (start is None) else start + + if start < 0: + # Consume all but the last -start items + cache = deque(enumerate(it, 1), maxlen=-start) + len_iter = cache[-1][0] if cache else 0 + + # Adjust start to be positive + i = max(len_iter + start, 0) + + # Adjust stop to be positive + if stop is None: + j = len_iter + elif stop >= 0: + j = min(stop, len_iter) + else: + j = max(len_iter + stop, 0) + + # Slice the cache + n = j - i + if n <= 0: + return + + for index, item in islice(cache, 0, n, step): + yield item + elif (stop is not None) and (stop < 0): + # Advance to the start position + next(islice(it, start, start), None) + + # When stop is negative, we have to carry -stop items while + # iterating + cache = deque(islice(it, -stop), maxlen=-stop) + + for index, item in enumerate(it): + cached_item = cache.popleft() + if index % step == 0: + yield cached_item + cache.append(item) + else: + # When both start and stop are positive we have the normal case + yield from islice(it, start, stop, step) + else: + start = -1 if (start is None) else start + + if (stop is not None) and (stop < 0): + # Consume all but the last items + n = -stop - 1 + cache = deque(enumerate(it, 1), maxlen=n) + len_iter = cache[-1][0] if cache else 0 + + # If start and stop are both negative they are comparable and + # we can just slice. Otherwise we can adjust start to be negative + # and then slice. + if start < 0: + i, j = start, stop + else: + i, j = min(start - len_iter, -1), None + + for index, item in list(cache)[i:j:step]: + yield item + else: + # Advance to the stop position + if stop is not None: + m = stop + 1 + next(islice(it, m, m), None) + + # stop is positive, so if start is negative they are not comparable + # and we need the rest of the items. + if start < 0: + i = start + n = None + # stop is None and start is positive, so we just need items up to + # the start index. + elif stop is None: + i = None + n = start + 1 + # Both stop and start are positive, so they are comparable. + else: + i = None + n = start - stop + if n <= 0: + return + + cache = list(islice(it, n)) + + yield from cache[i::step] + + +def always_reversible(iterable): + """An extension of :func:`reversed` that supports all iterables, not + just those which implement the ``Reversible`` or ``Sequence`` protocols. + + >>> print(*always_reversible(x for x in range(3))) + 2 1 0 + + If the iterable is already reversible, this function returns the + result of :func:`reversed()`. If the iterable is not reversible, + this function will cache the remaining items in the iterable and + yield them in reverse order, which may require significant storage. + """ + try: + return reversed(iterable) + except TypeError: + return reversed(list(iterable)) + + +def consecutive_groups(iterable, ordering=lambda x: x): + """Yield groups of consecutive items using :func:`itertools.groupby`. + The *ordering* function determines whether two items are adjacent by + returning their position. + + By default, the ordering function is the identity function. This is + suitable for finding runs of numbers: + + >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40] + >>> for group in consecutive_groups(iterable): + ... print(list(group)) + [1] + [10, 11, 12] + [20] + [30, 31, 32, 33] + [40] + + For finding runs of adjacent letters, try using the :meth:`index` method + of a string of letters: + + >>> from string import ascii_lowercase + >>> iterable = 'abcdfgilmnop' + >>> ordering = ascii_lowercase.index + >>> for group in consecutive_groups(iterable, ordering): + ... print(list(group)) + ['a', 'b', 'c', 'd'] + ['f', 'g'] + ['i'] + ['l', 'm', 'n', 'o', 'p'] + + Each group of consecutive items is an iterator that shares it source with + *iterable*. When an an output group is advanced, the previous group is + no longer available unless its elements are copied (e.g., into a ``list``). + + >>> iterable = [1, 2, 11, 12, 21, 22] + >>> saved_groups = [] + >>> for group in consecutive_groups(iterable): + ... saved_groups.append(list(group)) # Copy group elements + >>> saved_groups + [[1, 2], [11, 12], [21, 22]] + + """ + for k, g in groupby( + enumerate(iterable), key=lambda x: x[0] - ordering(x[1]) + ): + yield map(itemgetter(1), g) + + +def difference(iterable, func=sub, *, initial=None): + """This function is the inverse of :func:`itertools.accumulate`. By default + it will compute the first difference of *iterable* using + :func:`operator.sub`: + + >>> from itertools import accumulate + >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10 + >>> list(difference(iterable)) + [0, 1, 2, 3, 4] + + *func* defaults to :func:`operator.sub`, but other functions can be + specified. They will be applied as follows:: + + A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ... + + For example, to do progressive division: + + >>> iterable = [1, 2, 6, 24, 120] + >>> func = lambda x, y: x // y + >>> list(difference(iterable, func)) + [1, 2, 3, 4, 5] + + If the *initial* keyword is set, the first element will be skipped when + computing successive differences. + + >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10) + >>> list(difference(it, initial=10)) + [1, 2, 3] + + """ + a, b = tee(iterable) + try: + first = [next(b)] + except StopIteration: + return iter([]) + + if initial is not None: + first = [] + + return chain(first, map(func, b, a)) + + +class SequenceView(Sequence): + """Return a read-only view of the sequence object *target*. + + :class:`SequenceView` objects are analogous to Python's built-in + "dictionary view" types. They provide a dynamic view of a sequence's items, + meaning that when the sequence updates, so does the view. + + >>> seq = ['0', '1', '2'] + >>> view = SequenceView(seq) + >>> view + SequenceView(['0', '1', '2']) + >>> seq.append('3') + >>> view + SequenceView(['0', '1', '2', '3']) + + Sequence views support indexing, slicing, and length queries. They act + like the underlying sequence, except they don't allow assignment: + + >>> view[1] + '1' + >>> view[1:-1] + ['1', '2'] + >>> len(view) + 4 + + Sequence views are useful as an alternative to copying, as they don't + require (much) extra storage. + + """ + + def __init__(self, target): + if not isinstance(target, Sequence): + raise TypeError + self._target = target + + def __getitem__(self, index): + return self._target[index] + + def __len__(self): + return len(self._target) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, repr(self._target)) + + +class seekable: + """Wrap an iterator to allow for seeking backward and forward. This + progressively caches the items in the source iterable so they can be + re-visited. + + Call :meth:`seek` with an index to seek to that position in the source + iterable. + + To "reset" an iterator, seek to ``0``: + + >>> from itertools import count + >>> it = seekable((str(n) for n in count())) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> it.seek(0) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> next(it) + '3' + + You can also seek forward: + + >>> it = seekable((str(n) for n in range(20))) + >>> it.seek(10) + >>> next(it) + '10' + >>> it.seek(20) # Seeking past the end of the source isn't a problem + >>> list(it) + [] + >>> it.seek(0) # Resetting works even after hitting the end + >>> next(it), next(it), next(it) + ('0', '1', '2') + + Call :meth:`peek` to look ahead one item without advancing the iterator: + + >>> it = seekable('1234') + >>> it.peek() + '1' + >>> list(it) + ['1', '2', '3', '4'] + >>> it.peek(default='empty') + 'empty' + + Before the iterator is at its end, calling :func:`bool` on it will return + ``True``. After it will return ``False``: + + >>> it = seekable('5678') + >>> bool(it) + True + >>> list(it) + ['5', '6', '7', '8'] + >>> bool(it) + False + + You may view the contents of the cache with the :meth:`elements` method. + That returns a :class:`SequenceView`, a view that updates automatically: + + >>> it = seekable((str(n) for n in range(10))) + >>> next(it), next(it), next(it) + ('0', '1', '2') + >>> elements = it.elements() + >>> elements + SequenceView(['0', '1', '2']) + >>> next(it) + '3' + >>> elements + SequenceView(['0', '1', '2', '3']) + + By default, the cache grows as the source iterable progresses, so beware of + wrapping very large or infinite iterables. Supply *maxlen* to limit the + size of the cache (this of course limits how far back you can seek). + + >>> from itertools import count + >>> it = seekable((str(n) for n in count()), maxlen=2) + >>> next(it), next(it), next(it), next(it) + ('0', '1', '2', '3') + >>> list(it.elements()) + ['2', '3'] + >>> it.seek(0) + >>> next(it), next(it), next(it), next(it) + ('2', '3', '4', '5') + >>> next(it) + '6' + + """ + + def __init__(self, iterable, maxlen=None): + self._source = iter(iterable) + if maxlen is None: + self._cache = [] + else: + self._cache = deque([], maxlen) + self._index = None + + def __iter__(self): + return self + + def __next__(self): + if self._index is not None: + try: + item = self._cache[self._index] + except IndexError: + self._index = None + else: + self._index += 1 + return item + + item = next(self._source) + self._cache.append(item) + return item + + def __bool__(self): + try: + self.peek() + except StopIteration: + return False + return True + + def peek(self, default=_marker): + try: + peeked = next(self) + except StopIteration: + if default is _marker: + raise + return default + if self._index is None: + self._index = len(self._cache) + self._index -= 1 + return peeked + + def elements(self): + return SequenceView(self._cache) + + def seek(self, index): + self._index = index + remainder = index - len(self._cache) + if remainder > 0: + consume(self, remainder) + + +class run_length: + """ + :func:`run_length.encode` compresses an iterable with run-length encoding. + It yields groups of repeated items with the count of how many times they + were repeated: + + >>> uncompressed = 'abbcccdddd' + >>> list(run_length.encode(uncompressed)) + [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + + :func:`run_length.decode` decompresses an iterable that was previously + compressed with run-length encoding. It yields the items of the + decompressed iterable: + + >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] + >>> list(run_length.decode(compressed)) + ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd'] + + """ + + @staticmethod + def encode(iterable): + return ((k, ilen(g)) for k, g in groupby(iterable)) + + @staticmethod + def decode(iterable): + return chain.from_iterable(repeat(k, n) for k, n in iterable) + + +def exactly_n(iterable, n, predicate=bool): + """Return ``True`` if exactly ``n`` items in the iterable are ``True`` + according to the *predicate* function. + + >>> exactly_n([True, True, False], 2) + True + >>> exactly_n([True, True, False], 1) + False + >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3) + True + + The iterable will be advanced until ``n + 1`` truthy items are encountered, + so avoid calling it on infinite iterables. + + """ + return len(take(n + 1, filter(predicate, iterable))) == n + + +def circular_shifts(iterable): + """Return a list of circular shifts of *iterable*. + + >>> circular_shifts(range(4)) + [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)] + """ + lst = list(iterable) + return take(len(lst), windowed(cycle(lst), len(lst))) + + +def make_decorator(wrapping_func, result_index=0): + """Return a decorator version of *wrapping_func*, which is a function that + modifies an iterable. *result_index* is the position in that function's + signature where the iterable goes. + + This lets you use itertools on the "production end," i.e. at function + definition. This can augment what the function returns without changing the + function's code. + + For example, to produce a decorator version of :func:`chunked`: + + >>> from more_itertools import chunked + >>> chunker = make_decorator(chunked, result_index=0) + >>> @chunker(3) + ... def iter_range(n): + ... return iter(range(n)) + ... + >>> list(iter_range(9)) + [[0, 1, 2], [3, 4, 5], [6, 7, 8]] + + To only allow truthy items to be returned: + + >>> truth_serum = make_decorator(filter, result_index=1) + >>> @truth_serum(bool) + ... def boolean_test(): + ... return [0, 1, '', ' ', False, True] + ... + >>> list(boolean_test()) + [1, ' ', True] + + The :func:`peekable` and :func:`seekable` wrappers make for practical + decorators: + + >>> from more_itertools import peekable + >>> peekable_function = make_decorator(peekable) + >>> @peekable_function() + ... def str_range(*args): + ... return (str(x) for x in range(*args)) + ... + >>> it = str_range(1, 20, 2) + >>> next(it), next(it), next(it) + ('1', '3', '5') + >>> it.peek() + '7' + >>> next(it) + '7' + + """ + + # See https://sites.google.com/site/bbayles/index/decorator_factory for + # notes on how this works. + def decorator(*wrapping_args, **wrapping_kwargs): + def outer_wrapper(f): + def inner_wrapper(*args, **kwargs): + result = f(*args, **kwargs) + wrapping_args_ = list(wrapping_args) + wrapping_args_.insert(result_index, result) + return wrapping_func(*wrapping_args_, **wrapping_kwargs) + + return inner_wrapper + + return outer_wrapper + + return decorator + + +def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None): + """Return a dictionary that maps the items in *iterable* to categories + defined by *keyfunc*, transforms them with *valuefunc*, and + then summarizes them by category with *reducefunc*. + + *valuefunc* defaults to the identity function if it is unspecified. + If *reducefunc* is unspecified, no summarization takes place: + + >>> keyfunc = lambda x: x.upper() + >>> result = map_reduce('abbccc', keyfunc) + >>> sorted(result.items()) + [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])] + + Specifying *valuefunc* transforms the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> result = map_reduce('abbccc', keyfunc, valuefunc) + >>> sorted(result.items()) + [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])] + + Specifying *reducefunc* summarizes the categorized items: + + >>> keyfunc = lambda x: x.upper() + >>> valuefunc = lambda x: 1 + >>> reducefunc = sum + >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc) + >>> sorted(result.items()) + [('A', 1), ('B', 2), ('C', 3)] + + You may want to filter the input iterable before applying the map/reduce + procedure: + + >>> all_items = range(30) + >>> items = [x for x in all_items if 10 <= x <= 20] # Filter + >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1 + >>> categories = map_reduce(items, keyfunc=keyfunc) + >>> sorted(categories.items()) + [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])] + >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum) + >>> sorted(summaries.items()) + [(0, 90), (1, 75)] + + Note that all items in the iterable are gathered into a list before the + summarization step, which may require significant storage. + + The returned object is a :obj:`collections.defaultdict` with the + ``default_factory`` set to ``None``, such that it behaves like a normal + dictionary. + + """ + valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc + + ret = defaultdict(list) + for item in iterable: + key = keyfunc(item) + value = valuefunc(item) + ret[key].append(value) + + if reducefunc is not None: + for key, value_list in ret.items(): + ret[key] = reducefunc(value_list) + + ret.default_factory = None + return ret + + +def rlocate(iterable, pred=bool, window_size=None): + """Yield the index of each item in *iterable* for which *pred* returns + ``True``, starting from the right and moving left. + + *pred* defaults to :func:`bool`, which will select truthy items: + + >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4 + [4, 2, 1] + + Set *pred* to a custom function to, e.g., find the indexes for a particular + item: + + >>> iterable = iter('abcb') + >>> pred = lambda x: x == 'b' + >>> list(rlocate(iterable, pred)) + [3, 1] + + If *window_size* is given, then the *pred* function will be called with + that many items. This enables searching for sub-sequences: + + >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3] + >>> pred = lambda *args: args == (1, 2, 3) + >>> list(rlocate(iterable, pred=pred, window_size=3)) + [9, 5, 1] + + Beware, this function won't return anything for infinite iterables. + If *iterable* is reversible, ``rlocate`` will reverse it and search from + the right. Otherwise, it will search from the left and return the results + in reverse order. + + See :func:`locate` to for other example applications. + + """ + if window_size is None: + try: + len_iter = len(iterable) + return (len_iter - i - 1 for i in locate(reversed(iterable), pred)) + except TypeError: + pass + + return reversed(list(locate(iterable, pred, window_size))) + + +def replace(iterable, pred, substitutes, count=None, window_size=1): + """Yield the items from *iterable*, replacing the items for which *pred* + returns ``True`` with the items from the iterable *substitutes*. + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1] + >>> pred = lambda x: x == 0 + >>> substitutes = (2, 3) + >>> list(replace(iterable, pred, substitutes)) + [1, 1, 2, 3, 1, 1, 2, 3, 1, 1] + + If *count* is given, the number of replacements will be limited: + + >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0] + >>> pred = lambda x: x == 0 + >>> substitutes = [None] + >>> list(replace(iterable, pred, substitutes, count=2)) + [1, 1, None, 1, 1, None, 1, 1, 0] + + Use *window_size* to control the number of items passed as arguments to + *pred*. This allows for locating and replacing subsequences. + + >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5] + >>> window_size = 3 + >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred + >>> substitutes = [3, 4] # Splice in these items + >>> list(replace(iterable, pred, substitutes, window_size=window_size)) + [3, 4, 5, 3, 4, 5] + + """ + if window_size < 1: + raise ValueError('window_size must be at least 1') + + # Save the substitutes iterable, since it's used more than once + substitutes = tuple(substitutes) + + # Add padding such that the number of windows matches the length of the + # iterable + it = chain(iterable, [_marker] * (window_size - 1)) + windows = windowed(it, window_size) + + n = 0 + for w in windows: + # If the current window matches our predicate (and we haven't hit + # our maximum number of replacements), splice in the substitutes + # and then consume the following windows that overlap with this one. + # For example, if the iterable is (0, 1, 2, 3, 4...) + # and the window size is 2, we have (0, 1), (1, 2), (2, 3)... + # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2) + if pred(*w): + if (count is None) or (n < count): + n += 1 + yield from substitutes + consume(windows, window_size - 1) + continue + + # If there was no match (or we've reached the replacement limit), + # yield the first item from the window. + if w and (w[0] is not _marker): + yield w[0] + + +def partitions(iterable): + """Yield all possible order-preserving partitions of *iterable*. + + >>> iterable = 'abc' + >>> for part in partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['a', 'b', 'c'] + + This is unrelated to :func:`partition`. + + """ + sequence = list(iterable) + n = len(sequence) + for i in powerset(range(1, n)): + yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))] + + +def set_partitions(iterable, k=None): + """ + Yield the set partitions of *iterable* into *k* parts. Set partitions are + not order-preserving. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable, 2): + ... print([''.join(p) for p in part]) + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + + + If *k* is not given, every set partition is generated. + + >>> iterable = 'abc' + >>> for part in set_partitions(iterable): + ... print([''.join(p) for p in part]) + ['abc'] + ['a', 'bc'] + ['ab', 'c'] + ['b', 'ac'] + ['a', 'b', 'c'] + + """ + L = list(iterable) + n = len(L) + if k is not None: + if k < 1: + raise ValueError( + "Can't partition in a negative or zero number of groups" + ) + elif k > n: + return + + def set_partitions_helper(L, k): + n = len(L) + if k == 1: + yield [L] + elif n == k: + yield [[s] for s in L] + else: + e, *M = L + for p in set_partitions_helper(M, k - 1): + yield [[e], *p] + for p in set_partitions_helper(M, k): + for i in range(len(p)): + yield p[:i] + [[e] + p[i]] + p[i + 1 :] + + if k is None: + for k in range(1, n + 1): + yield from set_partitions_helper(L, k) + else: + yield from set_partitions_helper(L, k) + + +class time_limited: + """ + Yield items from *iterable* until *limit_seconds* have passed. + If the time limit expires before all items have been yielded, the + ``timed_out`` parameter will be set to ``True``. + + >>> from time import sleep + >>> def generator(): + ... yield 1 + ... yield 2 + ... sleep(0.2) + ... yield 3 + >>> iterable = time_limited(0.1, generator()) + >>> list(iterable) + [1, 2] + >>> iterable.timed_out + True + + Note that the time is checked before each item is yielded, and iteration + stops if the time elapsed is greater than *limit_seconds*. If your time + limit is 1 second, but it takes 2 seconds to generate the first item from + the iterable, the function will run for 2 seconds and not yield anything. + + """ + + def __init__(self, limit_seconds, iterable): + if limit_seconds < 0: + raise ValueError('limit_seconds must be positive') + self.limit_seconds = limit_seconds + self._iterable = iter(iterable) + self._start_time = monotonic() + self.timed_out = False + + def __iter__(self): + return self + + def __next__(self): + item = next(self._iterable) + if monotonic() - self._start_time > self.limit_seconds: + self.timed_out = True + raise StopIteration + + return item + + +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value + + +class _IChunk: + def __init__(self, iterable, n): + self._it = islice(iterable, n) + self._cache = deque() + + def fill_cache(self): + self._cache.extend(self._it) + + def __iter__(self): + return self + + def __next__(self): + try: + return next(self._it) + except StopIteration: + if self._cache: + return self._cache.popleft() + else: + raise + + +def ichunked(iterable, n): + """Break *iterable* into sub-iterables with *n* elements each. + :func:`ichunked` is like :func:`chunked`, but it yields iterables + instead of lists. + + If the sub-iterables are read in order, the elements of *iterable* + won't be stored in memory. + If they are read out of order, :func:`itertools.tee` is used to cache + elements as necessary. + + >>> from itertools import count + >>> all_chunks = ichunked(count(), 4) + >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks) + >>> list(c_2) # c_1's elements have been cached; c_3's haven't been + [4, 5, 6, 7] + >>> list(c_1) + [0, 1, 2, 3] + >>> list(c_3) + [8, 9, 10, 11] + + """ + source = peekable(iter(iterable)) + ichunk_marker = object() + while True: + # Check to see whether we're at the end of the source iterable + item = source.peek(ichunk_marker) + if item is ichunk_marker: + return + + chunk = _IChunk(source, n) + yield chunk + + # Advance the source iterable and fill previous chunk's cache + chunk.fill_cache() + + +def iequals(*iterables): + """Return ``True`` if all given *iterables* are equal to each other, + which means that they contain the same elements in the same order. + + The function is useful for comparing iterables of different data types + or iterables that do not support equality checks. + + >>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc")) + True + + >>> iequals("abc", "acb") + False + + Not to be confused with :func:`all_equals`, which checks whether all + elements of iterable are equal to each other. + + """ + return all(map(all_equal, zip_longest(*iterables, fillvalue=object()))) + + +def distinct_combinations(iterable, r): + """Yield the distinct combinations of *r* items taken from *iterable*. + + >>> list(distinct_combinations([0, 0, 1], 2)) + [(0, 0), (0, 1)] + + Equivalent to ``set(combinations(iterable))``, except duplicates are not + generated and thrown away. For larger input sequences this is much more + efficient. + + """ + if r < 0: + raise ValueError('r must be non-negative') + elif r == 0: + yield () + return + pool = tuple(iterable) + generators = [unique_everseen(enumerate(pool), key=itemgetter(1))] + current_combo = [None] * r + level = 0 + while generators: + try: + cur_idx, p = next(generators[-1]) + except StopIteration: + generators.pop() + level -= 1 + continue + current_combo[level] = p + if level + 1 == r: + yield tuple(current_combo) + else: + generators.append( + unique_everseen( + enumerate(pool[cur_idx + 1 :], cur_idx + 1), + key=itemgetter(1), + ) + ) + level += 1 + + +def filter_except(validator, iterable, *exceptions): + """Yield the items from *iterable* for which the *validator* function does + not raise one of the specified *exceptions*. + + *validator* is called for each item in *iterable*. + It should be a function that accepts one argument and raises an exception + if that item is not valid. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(filter_except(int, iterable, ValueError, TypeError)) + ['1', '2', '4'] + + If an exception other than one given by *exceptions* is raised by + *validator*, it is raised like normal. + """ + for item in iterable: + try: + validator(item) + except exceptions: + pass + else: + yield item + + +def map_except(function, iterable, *exceptions): + """Transform each item from *iterable* with *function* and yield the + result, unless *function* raises one of the specified *exceptions*. + + *function* is called to transform each item in *iterable*. + It should accept one argument. + + >>> iterable = ['1', '2', 'three', '4', None] + >>> list(map_except(int, iterable, ValueError, TypeError)) + [1, 2, 4] + + If an exception other than one given by *exceptions* is raised by + *function*, it is raised like normal. + """ + for item in iterable: + try: + yield function(item) + except exceptions: + pass + + +def map_if(iterable, pred, func, func_else=lambda x: x): + """Evaluate each item from *iterable* using *pred*. If the result is + equivalent to ``True``, transform the item with *func* and yield it. + Otherwise, transform the item with *func_else* and yield it. + + *pred*, *func*, and *func_else* should each be functions that accept + one argument. By default, *func_else* is the identity function. + + >>> from math import sqrt + >>> iterable = list(range(-5, 5)) + >>> iterable + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4] + >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig')) + [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig'] + >>> list(map_if(iterable, lambda x: x >= 0, + ... lambda x: f'{sqrt(x):.2f}', lambda x: None)) + [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00'] + """ + for item in iterable: + yield func(item) if pred(item) else func_else(item) + + +def _sample_unweighted(iterable, k): + # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li: + # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))". + + # Fill up the reservoir (collection of samples) with the first `k` samples + reservoir = take(k, iterable) + + # Generate random number that's the largest in a sample of k U(0,1) numbers + # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic + W = exp(log(random()) / k) + + # The number of elements to skip before changing the reservoir is a random + # number with a geometric distribution. Sample it using random() and logs. + next_index = k + floor(log(random()) / log(1 - W)) + + for index, element in enumerate(iterable, k): + if index == next_index: + reservoir[randrange(k)] = element + # The new W is the largest in a sample of k U(0, `old_W`) numbers + W *= exp(log(random()) / k) + next_index += floor(log(random()) / log(1 - W)) + 1 + + return reservoir + + +def _sample_weighted(iterable, k, weights): + # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. : + # "Weighted random sampling with a reservoir". + + # Log-transform for numerical stability for weights that are small/large + weight_keys = (log(random()) / weight for weight in weights) + + # Fill up the reservoir (collection of samples) with the first `k` + # weight-keys and elements, then heapify the list. + reservoir = take(k, zip(weight_keys, iterable)) + heapify(reservoir) + + # The number of jumps before changing the reservoir is a random variable + # with an exponential distribution. Sample it using random() and logs. + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + + for weight, element in zip(weights, iterable): + if weight >= weights_to_skip: + # The notation here is consistent with the paper, but we store + # the weight-keys in log-space for better numerical stability. + smallest_weight_key, _ = reservoir[0] + t_w = exp(weight * smallest_weight_key) + r_2 = uniform(t_w, 1) # generate U(t_w, 1) + weight_key = log(r_2) / weight + heapreplace(reservoir, (weight_key, element)) + smallest_weight_key, _ = reservoir[0] + weights_to_skip = log(random()) / smallest_weight_key + else: + weights_to_skip -= weight + + # Equivalent to [element for weight_key, element in sorted(reservoir)] + return [heappop(reservoir)[1] for _ in range(k)] + + +def sample(iterable, k, weights=None): + """Return a *k*-length list of elements chosen (without replacement) + from the *iterable*. Like :func:`random.sample`, but works on iterables + of unknown length. + + >>> iterable = range(100) + >>> sample(iterable, 5) # doctest: +SKIP + [81, 60, 96, 16, 4] + + An iterable with *weights* may also be given: + + >>> iterable = range(100) + >>> weights = (i * i + 1 for i in range(100)) + >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP + [79, 67, 74, 66, 78] + + The algorithm can also be used to generate weighted random permutations. + The relative weight of each item determines the probability that it + appears late in the permutation. + + >>> data = "abcdefgh" + >>> weights = range(1, len(data) + 1) + >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP + ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f'] + """ + if k == 0: + return [] + + iterable = iter(iterable) + if weights is None: + return _sample_unweighted(iterable, k) + else: + weights = iter(weights) + return _sample_weighted(iterable, k, weights) + + +def is_sorted(iterable, key=None, reverse=False, strict=False): + """Returns ``True`` if the items of iterable are in sorted order, and + ``False`` otherwise. *key* and *reverse* have the same meaning that they do + in the built-in :func:`sorted` function. + + >>> is_sorted(['1', '2', '3', '4', '5'], key=int) + True + >>> is_sorted([5, 4, 3, 1, 2], reverse=True) + False + + If *strict*, tests for strict sorting, that is, returns ``False`` if equal + elements are found: + + >>> is_sorted([1, 2, 2]) + True + >>> is_sorted([1, 2, 2], strict=True) + False + + The function returns ``False`` after encountering the first out-of-order + item. If there are no out-of-order items, the iterable is exhausted. + """ + + compare = (le if reverse else ge) if strict else (lt if reverse else gt) + it = iterable if key is None else map(key, iterable) + return not any(starmap(compare, pairwise(it))) + + +class AbortThread(BaseException): + pass + + +class callback_iter: + """Convert a function that uses callbacks to an iterator. + + Let *func* be a function that takes a `callback` keyword argument. + For example: + + >>> def func(callback=None): + ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]: + ... if callback: + ... callback(i, c) + ... return 4 + + + Use ``with callback_iter(func)`` to get an iterator over the parameters + that are delivered to the callback. + + >>> with callback_iter(func) as it: + ... for args, kwargs in it: + ... print(args) + (1, 'a') + (2, 'b') + (3, 'c') + + The function will be called in a background thread. The ``done`` property + indicates whether it has completed execution. + + >>> it.done + True + + If it completes successfully, its return value will be available + in the ``result`` property. + + >>> it.result + 4 + + Notes: + + * If the function uses some keyword argument besides ``callback``, supply + *callback_kwd*. + * If it finished executing, but raised an exception, accessing the + ``result`` property will raise the same exception. + * If it hasn't finished executing, accessing the ``result`` + property from within the ``with`` block will raise ``RuntimeError``. + * If it hasn't finished executing, accessing the ``result`` property from + outside the ``with`` block will raise a + ``more_itertools.AbortThread`` exception. + * Provide *wait_seconds* to adjust how frequently the it is polled for + output. + + """ + + def __init__(self, func, callback_kwd='callback', wait_seconds=0.1): + self._func = func + self._callback_kwd = callback_kwd + self._aborted = False + self._future = None + self._wait_seconds = wait_seconds + # Lazily import concurrent.future + self._executor = __import__( + ).futures.__import__("concurrent.futures").futures.ThreadPoolExecutor(max_workers=1) + self._iterator = self._reader() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._aborted = True + self._executor.shutdown() + + def __iter__(self): + return self + + def __next__(self): + return next(self._iterator) + + @property + def done(self): + if self._future is None: + return False + return self._future.done() + + @property + def result(self): + if not self.done: + raise RuntimeError('Function has not yet completed') + + return self._future.result() + + def _reader(self): + q = Queue() + + def callback(*args, **kwargs): + if self._aborted: + raise AbortThread('canceled by user') + + q.put((args, kwargs)) + + self._future = self._executor.submit( + self._func, **{self._callback_kwd: callback} + ) + + while True: + try: + item = q.get(timeout=self._wait_seconds) + except Empty: + pass + else: + q.task_done() + yield item + + if self._future.done(): + break + + remaining = [] + while True: + try: + item = q.get_nowait() + except Empty: + break + else: + q.task_done() + remaining.append(item) + q.join() + yield from remaining + + +def windowed_complete(iterable, n): + """ + Yield ``(beginning, middle, end)`` tuples, where: + + * Each ``middle`` has *n* items from *iterable* + * Each ``beginning`` has the items before the ones in ``middle`` + * Each ``end`` has the items after the ones in ``middle`` + + >>> iterable = range(7) + >>> n = 3 + >>> for beginning, middle, end in windowed_complete(iterable, n): + ... print(beginning, middle, end) + () (0, 1, 2) (3, 4, 5, 6) + (0,) (1, 2, 3) (4, 5, 6) + (0, 1) (2, 3, 4) (5, 6) + (0, 1, 2) (3, 4, 5) (6,) + (0, 1, 2, 3) (4, 5, 6) () + + Note that *n* must be at least 0 and most equal to the length of + *iterable*. + + This function will exhaust the iterable and may require significant + storage. + """ + if n < 0: + raise ValueError('n must be >= 0') + + seq = tuple(iterable) + size = len(seq) + + if n > size: + raise ValueError('n must be <= len(seq)') + + for i in range(size - n + 1): + beginning = seq[:i] + middle = seq[i : i + n] + end = seq[i + n :] + yield beginning, middle, end + + +def all_unique(iterable, key=None): + """ + Returns ``True`` if all the elements of *iterable* are unique (no two + elements are equal). + + >>> all_unique('ABCB') + False + + If a *key* function is specified, it will be used to make comparisons. + + >>> all_unique('ABCb') + True + >>> all_unique('ABCb', str.lower) + False + + The function returns as soon as the first non-unique element is + encountered. Iterables with a mix of hashable and unhashable items can + be used, but the function will be slower for unhashable items. + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + for element in map(key, iterable) if key else iterable: + try: + if element in seenset: + return False + seenset_add(element) + except TypeError: + if element in seenlist: + return False + seenlist_add(element) + return True + + +def nth_product(index, *args): + """Equivalent to ``list(product(*args))[index]``. + + The products of *args* can be ordered lexicographically. + :func:`nth_product` computes the product at sort position *index* without + computing the previous products. + + >>> nth_product(8, range(2), range(2), range(2), range(2)) + (1, 0, 0, 0) + + ``IndexError`` will be raised if the given *index* is invalid. + """ + pools = list(map(tuple, reversed(args))) + ns = list(map(len, pools)) + + c = reduce(mul, ns) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + result = [] + for pool, n in zip(pools, ns): + result.append(pool[index % n]) + index //= n + + return tuple(reversed(result)) + + +def nth_permutation(iterable, r, index): + """Equivalent to ``list(permutations(iterable, r))[index]``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`nth_permutation` + computes the subsequence at sort position *index* directly, without + computing the previous subsequences. + + >>> nth_permutation('ghijk', 2, 5) + ('h', 'i') + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = list(iterable) + n = len(pool) + + if r is None or r == n: + r, c = n, factorial(n) + elif not 0 <= r < n: + raise ValueError + else: + c = factorial(n) // factorial(n - r) + + if index < 0: + index += c + + if not 0 <= index < c: + raise IndexError + + if c == 0: + return tuple() + + result = [0] * r + q = index * factorial(n) // c if r < n else index + for d in range(1, n + 1): + q, i = divmod(q, d) + if 0 <= n - d < r: + result[n - d] = i + if q == 0: + break + + return tuple(map(pool.pop, result)) + + +def value_chain(*args): + """Yield all arguments passed to the function in the same order in which + they were passed. If an argument itself is iterable then iterate over its + values. + + >>> list(value_chain(1, 2, 3, [4, 5, 6])) + [1, 2, 3, 4, 5, 6] + + Binary and text strings are not considered iterable and are emitted + as-is: + + >>> list(value_chain('12', '34', ['56', '78'])) + ['12', '34', '56', '78'] + + + Multiple levels of nesting are not flattened. + + """ + for value in args: + if isinstance(value, (str, bytes)): + yield value + continue + try: + yield from value + except TypeError: + yield value + + +def product_index(element, *args): + """Equivalent to ``list(product(*args)).index(element)`` + + The products of *args* can be ordered lexicographically. + :func:`product_index` computes the first index of *element* without + computing the previous products. + + >>> product_index([8, 2], range(10), range(5)) + 42 + + ``ValueError`` will be raised if the given *element* isn't in the product + of *args*. + """ + index = 0 + + for x, pool in zip_longest(element, args, fillvalue=_marker): + if x is _marker or pool is _marker: + raise ValueError('element is not a product of args') + + pool = tuple(pool) + index = index * len(pool) + pool.index(x) + + return index + + +def combination_index(element, iterable): + """Equivalent to ``list(combinations(iterable, r)).index(element)`` + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`combination_index` computes the index of the + first *element*, without computing the previous combinations. + + >>> combination_index('adf', 'abcdefg') + 10 + + ``ValueError`` will be raised if the given *element* isn't one of the + combinations of *iterable*. + """ + element = enumerate(element) + k, y = next(element, (None, None)) + if k is None: + return 0 + + indexes = [] + pool = enumerate(iterable) + for n, x in pool: + if x == y: + indexes.append(n) + tmp, y = next(element, (None, None)) + if tmp is None: + break + else: + k = tmp + else: + raise ValueError('element is not a combination of iterable') + + n, _ = last(pool, default=(n, None)) + + # Python versions below 3.8 don't have math.comb + index = 1 + for i, j in enumerate(reversed(indexes), start=1): + j = n - j + if i <= j: + index += factorial(j) // (factorial(i) * factorial(j - i)) + + return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index + + +def permutation_index(element, iterable): + """Equivalent to ``list(permutations(iterable, r)).index(element)``` + + The subsequences of *iterable* that are of length *r* where order is + important can be ordered lexicographically. :func:`permutation_index` + computes the index of the first *element* directly, without computing + the previous permutations. + + >>> permutation_index([1, 3, 2], range(5)) + 19 + + ``ValueError`` will be raised if the given *element* isn't one of the + permutations of *iterable*. + """ + index = 0 + pool = list(iterable) + for i, x in zip(range(len(pool), -1, -1), element): + r = pool.index(x) + index = index * i + r + del pool[r] + + return index + + +class countable: + """Wrap *iterable* and keep a count of how many items have been consumed. + + The ``items_seen`` attribute starts at ``0`` and increments as the iterable + is consumed: + + >>> iterable = map(str, range(10)) + >>> it = countable(iterable) + >>> it.items_seen + 0 + >>> next(it), next(it) + ('0', '1') + >>> list(it) + ['2', '3', '4', '5', '6', '7', '8', '9'] + >>> it.items_seen + 10 + """ + + def __init__(self, iterable): + self._it = iter(iterable) + self.items_seen = 0 + + def __iter__(self): + return self + + def __next__(self): + item = next(self._it) + self.items_seen += 1 + + return item + + +def chunked_even(iterable, n): + """Break *iterable* into lists of approximately length *n*. + Items are distributed such the lengths of the lists differ by at most + 1 item. + + >>> iterable = [1, 2, 3, 4, 5, 6, 7] + >>> n = 3 + >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2 + [[1, 2, 3], [4, 5], [6, 7]] + >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1 + [[1, 2, 3], [4, 5, 6], [7]] + + """ + + len_method = getattr(iterable, '__len__', None) + + if len_method is None: + return _chunked_even_online(iterable, n) + else: + return _chunked_even_finite(iterable, len_method(), n) + + +def _chunked_even_online(iterable, n): + buffer = [] + maxbuf = n + (n - 2) * (n - 1) + for x in iterable: + buffer.append(x) + if len(buffer) == maxbuf: + yield buffer[:n] + buffer = buffer[n:] + yield from _chunked_even_finite(buffer, len(buffer), n) + + +def _chunked_even_finite(iterable, N, n): + if N < 1: + return + + # Lists are either size `full_size <= n` or `partial_size = full_size - 1` + q, r = divmod(N, n) + num_lists = q + (1 if r > 0 else 0) + q, r = divmod(N, num_lists) + full_size = q + (1 if r > 0 else 0) + partial_size = full_size - 1 + num_full = N - partial_size * num_lists + num_partial = num_lists - num_full + + buffer = [] + iterator = iter(iterable) + + # Yield num_full lists of full_size + for x in iterator: + buffer.append(x) + if len(buffer) == full_size: + yield buffer + buffer = [] + num_full -= 1 + if num_full <= 0: + break + + # Yield num_partial lists of partial_size + for x in iterator: + buffer.append(x) + if len(buffer) == partial_size: + yield buffer + buffer = [] + num_partial -= 1 + + +def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False): + """A version of :func:`zip` that "broadcasts" any scalar + (i.e., non-iterable) items into output tuples. + + >>> iterable_1 = [1, 2, 3] + >>> iterable_2 = ['a', 'b', 'c'] + >>> scalar = '_' + >>> list(zip_broadcast(iterable_1, iterable_2, scalar)) + [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')] + + The *scalar_types* keyword argument determines what types are considered + scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to + treat strings and byte strings as iterable: + + >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None)) + [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')] + + If the *strict* keyword argument is ``True``, then + ``UnequalIterablesError`` will be raised if any of the iterables have + different lengths. + """ + + def is_scalar(obj): + if scalar_types and isinstance(obj, scalar_types): + return True + try: + iter(obj) + except TypeError: + return True + else: + return False + + size = len(objects) + if not size: + return + + iterables, iterable_positions = [], [] + scalars, scalar_positions = [], [] + for i, obj in enumerate(objects): + if is_scalar(obj): + scalars.append(obj) + scalar_positions.append(i) + else: + iterables.append(iter(obj)) + iterable_positions.append(i) + + if len(scalars) == size: + yield tuple(objects) + return + + zipper = _zip_equal if strict else zip + for item in zipper(*iterables): + new_item = [None] * size + + for i, elem in zip(iterable_positions, item): + new_item[i] = elem + + for i, elem in zip(scalar_positions, scalars): + new_item[i] = elem + + yield tuple(new_item) + + +def unique_in_window(iterable, n, key=None): + """Yield the items from *iterable* that haven't been seen recently. + *n* is the size of the lookback window. + + >>> iterable = [0, 1, 0, 2, 3, 0] + >>> n = 3 + >>> list(unique_in_window(iterable, n)) + [0, 1, 2, 3, 0] + + The *key* function, if provided, will be used to determine uniqueness: + + >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower())) + ['a', 'b', 'c', 'd', 'a'] + + The items in *iterable* must be hashable. + + """ + if n <= 0: + raise ValueError('n must be greater than 0') + + window = deque(maxlen=n) + uniques = set() + use_key = key is not None + + for item in iterable: + k = key(item) if use_key else item + if k in uniques: + continue + + if len(uniques) == n: + uniques.discard(window[0]) + + uniques.add(k) + window.append(k) + + yield item + + +def duplicates_everseen(iterable, key=None): + """Yield duplicate elements after their first appearance. + + >>> list(duplicates_everseen('mississippi')) + ['s', 'i', 's', 's', 'i', 'p', 'i'] + >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a'] + + This function is analagous to :func:`unique_everseen` and is subject to + the same performance considerations. + + """ + seen_set = set() + seen_list = [] + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seen_set: + seen_set.add(k) + else: + yield element + except TypeError: + if k not in seen_list: + seen_list.append(k) + else: + yield element + + +def duplicates_justseen(iterable, key=None): + """Yields serially-duplicate elements after their first appearance. + + >>> list(duplicates_justseen('mississippi')) + ['s', 's', 'p'] + >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower)) + ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a'] + + This function is analagous to :func:`unique_justseen`. + + """ + return flatten( + map( + lambda group_tuple: islice_extended(group_tuple[1])[1:], + groupby(iterable, key), + ) + ) + + +def minmax(iterable_or_value, *others, key=None, default=_marker): + """Returns both the smallest and largest items in an iterable + or the largest of two or more arguments. + + >>> minmax([3, 1, 5]) + (1, 5) + + >>> minmax(4, 2, 6) + (2, 6) + + If a *key* function is provided, it will be used to transform the input + items for comparison. + + >>> minmax([5, 30], key=str) # '30' sorts before '5' + (30, 5) + + If a *default* value is provided, it will be returned if there are no + input items. + + >>> minmax([], default=(0, 0)) + (0, 0) + + Otherwise ``ValueError`` is raised. + + This function is based on the + `recipe <http://code.activestate.com/recipes/577916/>`__ by + Raymond Hettinger and takes care to minimize the number of comparisons + performed. + """ + iterable = (iterable_or_value, *others) if others else iterable_or_value + + it = iter(iterable) + + try: + lo = hi = next(it) + except StopIteration as e: + if default is _marker: + raise ValueError( + '`minmax()` argument is an empty iterable. ' + 'Provide a `default` value to suppress this error.' + ) from e + return default + + # Different branches depending on the presence of key. This saves a lot + # of unimportant copies which would slow the "key=None" branch + # significantly down. + if key is None: + for x, y in zip_longest(it, it, fillvalue=lo): + if y < x: + x, y = y, x + if x < lo: + lo = x + if hi < y: + hi = y + + else: + lo_key = hi_key = key(lo) + + for x, y in zip_longest(it, it, fillvalue=lo): + x_key, y_key = key(x), key(y) + + if y_key < x_key: + x, y, x_key, y_key = y, x, y_key, x_key + if x_key < lo_key: + lo, lo_key = x, x_key + if hi_key < y_key: + hi, hi_key = y, y_key + + return lo, hi + + +def constrained_batches( + iterable, max_size, max_count=None, get_len=len, strict=True +): + """Yield batches of items from *iterable* with a combined size limited by + *max_size*. + + >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1'] + >>> list(constrained_batches(iterable, 10)) + [(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')] + + If a *max_count* is supplied, the number of items per batch is also + limited: + + >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1'] + >>> list(constrained_batches(iterable, 10, max_count = 2)) + [(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)] + + If a *get_len* function is supplied, use that instead of :func:`len` to + determine item size. + + If *strict* is ``True``, raise ``ValueError`` if any single item is bigger + than *max_size*. Otherwise, allow single items to exceed *max_size*. + """ + if max_size <= 0: + raise ValueError('maximum size must be greater than zero') + + batch = [] + batch_size = 0 + batch_count = 0 + for item in iterable: + item_len = get_len(item) + if strict and item_len > max_size: + raise ValueError('item size exceeds maximum size') + + reached_count = batch_count == max_count + reached_size = item_len + batch_size > max_size + if batch_count and (reached_size or reached_count): + yield tuple(batch) + batch.clear() + batch_size = 0 + batch_count = 0 + + batch.append(item) + batch_size += item_len + batch_count += 1 + + if batch: + yield tuple(batch) + + +def gray_product(*iterables): + """Like :func:`itertools.product`, but return tuples in an order such + that only one element in the generated tuple changes from one iteration + to the next. + + >>> list(gray_product('AB','CD')) + [('A', 'C'), ('B', 'C'), ('B', 'D'), ('A', 'D')] + + This function consumes all of the input iterables before producing output. + If any of the input iterables have fewer than two items, ``ValueError`` + is raised. + + For information on the algorithm, see + `this section <https://www-cs-faculty.stanford.edu/~knuth/fasc2a.ps.gz>`__ + of Donald Knuth's *The Art of Computer Programming*. + """ + all_iterables = tuple(tuple(x) for x in iterables) + iterable_count = len(all_iterables) + for iterable in all_iterables: + if len(iterable) < 2: + raise ValueError("each iterable must have two or more items") + + # This is based on "Algorithm H" from section 7.2.1.1, page 20. + # a holds the indexes of the source iterables for the n-tuple to be yielded + # f is the array of "focus pointers" + # o is the array of "directions" + a = [0] * iterable_count + f = list(range(iterable_count + 1)) + o = [1] * iterable_count + while True: + yield tuple(all_iterables[i][a[i]] for i in range(iterable_count)) + j = f[0] + f[0] = 0 + if j == iterable_count: + break + a[j] = a[j] + o[j] + if a[j] == 0 or a[j] == len(all_iterables[j]) - 1: + o[j] = -o[j] + f[j] = f[j + 1] + f[j + 1] = j + 1 diff --git a/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/recipes.py b/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/recipes.py new file mode 100644 index 0000000000..3facc2e3a6 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/more_itertools/recipes.py @@ -0,0 +1,930 @@ +"""Imported from the recipes section of the itertools documentation. + +All functions taken from the recipes section of the itertools library docs +[1]_. +Some backward-compatible usability improvements have been made. + +.. [1] http://docs.python.org/library/itertools.html#recipes + +""" +import math +import operator +import warnings + +from collections import deque +from collections.abc import Sized +from functools import reduce +from itertools import ( + chain, + combinations, + compress, + count, + cycle, + groupby, + islice, + product, + repeat, + starmap, + tee, + zip_longest, +) +from random import randrange, sample, choice +from sys import hexversion + +__all__ = [ + 'all_equal', + 'batched', + 'before_and_after', + 'consume', + 'convolve', + 'dotproduct', + 'first_true', + 'factor', + 'flatten', + 'grouper', + 'iter_except', + 'iter_index', + 'matmul', + 'ncycles', + 'nth', + 'nth_combination', + 'padnone', + 'pad_none', + 'pairwise', + 'partition', + 'polynomial_from_roots', + 'powerset', + 'prepend', + 'quantify', + 'random_combination_with_replacement', + 'random_combination', + 'random_permutation', + 'random_product', + 'repeatfunc', + 'roundrobin', + 'sieve', + 'sliding_window', + 'subslices', + 'tabulate', + 'tail', + 'take', + 'transpose', + 'triplewise', + 'unique_everseen', + 'unique_justseen', +] + +_marker = object() + + +def take(n, iterable): + """Return first *n* items of the iterable as a list. + + >>> take(3, range(10)) + [0, 1, 2] + + If there are fewer than *n* items in the iterable, all of them are + returned. + + >>> take(10, range(3)) + [0, 1, 2] + + """ + return list(islice(iterable, n)) + + +def tabulate(function, start=0): + """Return an iterator over the results of ``func(start)``, + ``func(start + 1)``, ``func(start + 2)``... + + *func* should be a function that accepts one integer argument. + + If *start* is not specified it defaults to 0. It will be incremented each + time the iterator is advanced. + + >>> square = lambda x: x ** 2 + >>> iterator = tabulate(square, -3) + >>> take(4, iterator) + [9, 4, 1, 0] + + """ + return map(function, count(start)) + + +def tail(n, iterable): + """Return an iterator over the last *n* items of *iterable*. + + >>> t = tail(3, 'ABCDEFG') + >>> list(t) + ['E', 'F', 'G'] + + """ + # If the given iterable has a length, then we can use islice to get its + # final elements. Note that if the iterable is not actually Iterable, + # either islice or deque will throw a TypeError. This is why we don't + # check if it is Iterable. + if isinstance(iterable, Sized): + yield from islice(iterable, max(0, len(iterable) - n), None) + else: + yield from iter(deque(iterable, maxlen=n)) + + +def consume(iterator, n=None): + """Advance *iterable* by *n* steps. If *n* is ``None``, consume it + entirely. + + Efficiently exhausts an iterator without returning values. Defaults to + consuming the whole iterator, but an optional second argument may be + provided to limit consumption. + + >>> i = (x for x in range(10)) + >>> next(i) + 0 + >>> consume(i, 3) + >>> next(i) + 4 + >>> consume(i) + >>> next(i) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + StopIteration + + If the iterator has fewer items remaining than the provided limit, the + whole iterator will be consumed. + + >>> i = (x for x in range(3)) + >>> consume(i, 5) + >>> next(i) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + StopIteration + + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def nth(iterable, n, default=None): + """Returns the nth item or a default value. + + >>> l = range(10) + >>> nth(l, 3) + 3 + >>> nth(l, 20, "zebra") + 'zebra' + + """ + return next(islice(iterable, n, None), default) + + +def all_equal(iterable): + """ + Returns ``True`` if all the elements are equal to each other. + + >>> all_equal('aaaa') + True + >>> all_equal('aaab') + False + + """ + g = groupby(iterable) + return next(g, True) and not next(g, False) + + +def quantify(iterable, pred=bool): + """Return the how many times the predicate is true. + + >>> quantify([True, False, True]) + 2 + + """ + return sum(map(pred, iterable)) + + +def pad_none(iterable): + """Returns the sequence of elements and then returns ``None`` indefinitely. + + >>> take(5, pad_none(range(3))) + [0, 1, 2, None, None] + + Useful for emulating the behavior of the built-in :func:`map` function. + + See also :func:`padded`. + + """ + return chain(iterable, repeat(None)) + + +padnone = pad_none + + +def ncycles(iterable, n): + """Returns the sequence elements *n* times + + >>> list(ncycles(["a", "b"], 3)) + ['a', 'b', 'a', 'b', 'a', 'b'] + + """ + return chain.from_iterable(repeat(tuple(iterable), n)) + + +def dotproduct(vec1, vec2): + """Returns the dot product of the two iterables. + + >>> dotproduct([10, 10], [20, 20]) + 400 + + """ + return sum(map(operator.mul, vec1, vec2)) + + +def flatten(listOfLists): + """Return an iterator flattening one level of nesting in a list of lists. + + >>> list(flatten([[0, 1], [2, 3]])) + [0, 1, 2, 3] + + See also :func:`collapse`, which can flatten multiple levels of nesting. + + """ + return chain.from_iterable(listOfLists) + + +def repeatfunc(func, times=None, *args): + """Call *func* with *args* repeatedly, returning an iterable over the + results. + + If *times* is specified, the iterable will terminate after that many + repetitions: + + >>> from operator import add + >>> times = 4 + >>> args = 3, 5 + >>> list(repeatfunc(add, times, *args)) + [8, 8, 8, 8] + + If *times* is ``None`` the iterable will not terminate: + + >>> from random import randrange + >>> times = None + >>> args = 1, 11 + >>> take(6, repeatfunc(randrange, times, *args)) # doctest:+SKIP + [2, 4, 8, 1, 8, 4] + + """ + if times is None: + return starmap(func, repeat(args)) + return starmap(func, repeat(args, times)) + + +def _pairwise(iterable): + """Returns an iterator of paired items, overlapping, from the original + + >>> take(4, pairwise(count())) + [(0, 1), (1, 2), (2, 3), (3, 4)] + + On Python 3.10 and above, this is an alias for :func:`itertools.pairwise`. + + """ + a, b = tee(iterable) + next(b, None) + yield from zip(a, b) + + +try: + from itertools import pairwise as itertools_pairwise +except ImportError: + pairwise = _pairwise +else: + + def pairwise(iterable): + yield from itertools_pairwise(iterable) + + pairwise.__doc__ = _pairwise.__doc__ + + +class UnequalIterablesError(ValueError): + def __init__(self, details=None): + msg = 'Iterables have different lengths' + if details is not None: + msg += (': index 0 has length {}; index {} has length {}').format( + *details + ) + + super().__init__(msg) + + +def _zip_equal_generator(iterables): + for combo in zip_longest(*iterables, fillvalue=_marker): + for val in combo: + if val is _marker: + raise UnequalIterablesError() + yield combo + + +def _zip_equal(*iterables): + # Check whether the iterables are all the same size. + try: + first_size = len(iterables[0]) + for i, it in enumerate(iterables[1:], 1): + size = len(it) + if size != first_size: + break + else: + # If we didn't break out, we can use the built-in zip. + return zip(*iterables) + + # If we did break out, there was a mismatch. + raise UnequalIterablesError(details=(first_size, i, size)) + # If any one of the iterables didn't have a length, start reading + # them until one runs out. + except TypeError: + return _zip_equal_generator(iterables) + + +def grouper(iterable, n, incomplete='fill', fillvalue=None): + """Group elements from *iterable* into fixed-length groups of length *n*. + + >>> list(grouper('ABCDEF', 3)) + [('A', 'B', 'C'), ('D', 'E', 'F')] + + The keyword arguments *incomplete* and *fillvalue* control what happens for + iterables whose length is not a multiple of *n*. + + When *incomplete* is `'fill'`, the last group will contain instances of + *fillvalue*. + + >>> list(grouper('ABCDEFG', 3, incomplete='fill', fillvalue='x')) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G', 'x', 'x')] + + When *incomplete* is `'ignore'`, the last group will not be emitted. + + >>> list(grouper('ABCDEFG', 3, incomplete='ignore', fillvalue='x')) + [('A', 'B', 'C'), ('D', 'E', 'F')] + + When *incomplete* is `'strict'`, a subclass of `ValueError` will be raised. + + >>> it = grouper('ABCDEFG', 3, incomplete='strict') + >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + UnequalIterablesError + + """ + args = [iter(iterable)] * n + if incomplete == 'fill': + return zip_longest(*args, fillvalue=fillvalue) + if incomplete == 'strict': + return _zip_equal(*args) + if incomplete == 'ignore': + return zip(*args) + else: + raise ValueError('Expected fill, strict, or ignore') + + +def roundrobin(*iterables): + """Yields an item from each iterable, alternating between them. + + >>> list(roundrobin('ABC', 'D', 'EF')) + ['A', 'D', 'E', 'B', 'F', 'C'] + + This function produces the same output as :func:`interleave_longest`, but + may perform better for some inputs (in particular when the number of + iterables is small). + + """ + # Recipe credited to George Sakkis + pending = len(iterables) + nexts = cycle(iter(it).__next__ for it in iterables) + while pending: + try: + for next in nexts: + yield next() + except StopIteration: + pending -= 1 + nexts = cycle(islice(nexts, pending)) + + +def partition(pred, iterable): + """ + Returns a 2-tuple of iterables derived from the input iterable. + The first yields the items that have ``pred(item) == False``. + The second yields the items that have ``pred(item) == True``. + + >>> is_odd = lambda x: x % 2 != 0 + >>> iterable = range(10) + >>> even_items, odd_items = partition(is_odd, iterable) + >>> list(even_items), list(odd_items) + ([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]) + + If *pred* is None, :func:`bool` is used. + + >>> iterable = [0, 1, False, True, '', ' '] + >>> false_items, true_items = partition(None, iterable) + >>> list(false_items), list(true_items) + ([0, False, ''], [1, True, ' ']) + + """ + if pred is None: + pred = bool + + evaluations = ((pred(x), x) for x in iterable) + t1, t2 = tee(evaluations) + return ( + (x for (cond, x) in t1 if not cond), + (x for (cond, x) in t2 if cond), + ) + + +def powerset(iterable): + """Yields all possible subsets of the iterable. + + >>> list(powerset([1, 2, 3])) + [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)] + + :func:`powerset` will operate on iterables that aren't :class:`set` + instances, so repeated elements in the input will produce repeated elements + in the output. Use :func:`unique_everseen` on the input to avoid generating + duplicates: + + >>> seq = [1, 1, 0] + >>> list(powerset(seq)) + [(), (1,), (1,), (0,), (1, 1), (1, 0), (1, 0), (1, 1, 0)] + >>> from more_itertools import unique_everseen + >>> list(powerset(unique_everseen(seq))) + [(), (1,), (0,), (1, 0)] + + """ + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def unique_everseen(iterable, key=None): + """ + Yield unique elements, preserving order. + + >>> list(unique_everseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D'] + >>> list(unique_everseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'D'] + + Sequences with a mix of hashable and unhashable items can be used. + The function will be slower (i.e., `O(n^2)`) for unhashable items. + + Remember that ``list`` objects are unhashable - you can use the *key* + parameter to transform the list to a tuple (which is hashable) to + avoid a slowdown. + + >>> iterable = ([1, 2], [2, 3], [1, 2]) + >>> list(unique_everseen(iterable)) # Slow + [[1, 2], [2, 3]] + >>> list(unique_everseen(iterable, key=tuple)) # Faster + [[1, 2], [2, 3]] + + Similary, you may want to convert unhashable ``set`` objects with + ``key=frozenset``. For ``dict`` objects, + ``key=lambda x: frozenset(x.items())`` can be used. + + """ + seenset = set() + seenset_add = seenset.add + seenlist = [] + seenlist_add = seenlist.append + use_key = key is not None + + for element in iterable: + k = key(element) if use_key else element + try: + if k not in seenset: + seenset_add(k) + yield element + except TypeError: + if k not in seenlist: + seenlist_add(k) + yield element + + +def unique_justseen(iterable, key=None): + """Yields elements in order, ignoring serial duplicates + + >>> list(unique_justseen('AAAABBBCCDAABBB')) + ['A', 'B', 'C', 'D', 'A', 'B'] + >>> list(unique_justseen('ABBCcAD', str.lower)) + ['A', 'B', 'C', 'A', 'D'] + + """ + return map(next, map(operator.itemgetter(1), groupby(iterable, key))) + + +def iter_except(func, exception, first=None): + """Yields results from a function repeatedly until an exception is raised. + + Converts a call-until-exception interface to an iterator interface. + Like ``iter(func, sentinel)``, but uses an exception instead of a sentinel + to end the loop. + + >>> l = [0, 1, 2] + >>> list(iter_except(l.pop, IndexError)) + [2, 1, 0] + + Multiple exceptions can be specified as a stopping condition: + + >>> l = [1, 2, 3, '...', 4, 5, 6] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [7, 6, 5] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [4, 3, 2] + >>> list(iter_except(lambda: 1 + l.pop(), (IndexError, TypeError))) + [] + + """ + try: + if first is not None: + yield first() + while 1: + yield func() + except exception: + pass + + +def first_true(iterable, default=None, pred=None): + """ + Returns the first true value in the iterable. + + If no true value is found, returns *default* + + If *pred* is not None, returns the first item for which + ``pred(item) == True`` . + + >>> first_true(range(10)) + 1 + >>> first_true(range(10), pred=lambda x: x > 5) + 6 + >>> first_true(range(10), default='missing', pred=lambda x: x > 9) + 'missing' + + """ + return next(filter(pred, iterable), default) + + +def random_product(*args, repeat=1): + """Draw an item at random from each of the input iterables. + + >>> random_product('abc', range(4), 'XYZ') # doctest:+SKIP + ('c', 3, 'Z') + + If *repeat* is provided as a keyword argument, that many items will be + drawn from each iterable. + + >>> random_product('abcd', range(4), repeat=2) # doctest:+SKIP + ('a', 2, 'd', 3) + + This equivalent to taking a random selection from + ``itertools.product(*args, **kwarg)``. + + """ + pools = [tuple(pool) for pool in args] * repeat + return tuple(choice(pool) for pool in pools) + + +def random_permutation(iterable, r=None): + """Return a random *r* length permutation of the elements in *iterable*. + + If *r* is not specified or is ``None``, then *r* defaults to the length of + *iterable*. + + >>> random_permutation(range(5)) # doctest:+SKIP + (3, 4, 0, 1, 2) + + This equivalent to taking a random selection from + ``itertools.permutations(iterable, r)``. + + """ + pool = tuple(iterable) + r = len(pool) if r is None else r + return tuple(sample(pool, r)) + + +def random_combination(iterable, r): + """Return a random *r* length subsequence of the elements in *iterable*. + + >>> random_combination(range(5), 3) # doctest:+SKIP + (2, 3, 4) + + This equivalent to taking a random selection from + ``itertools.combinations(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(sample(range(n), r)) + return tuple(pool[i] for i in indices) + + +def random_combination_with_replacement(iterable, r): + """Return a random *r* length subsequence of elements in *iterable*, + allowing individual elements to be repeated. + + >>> random_combination_with_replacement(range(3), 5) # doctest:+SKIP + (0, 0, 1, 2, 2) + + This equivalent to taking a random selection from + ``itertools.combinations_with_replacement(iterable, r)``. + + """ + pool = tuple(iterable) + n = len(pool) + indices = sorted(randrange(n) for i in range(r)) + return tuple(pool[i] for i in indices) + + +def nth_combination(iterable, r, index): + """Equivalent to ``list(combinations(iterable, r))[index]``. + + The subsequences of *iterable* that are of length *r* can be ordered + lexicographically. :func:`nth_combination` computes the subsequence at + sort position *index* directly, without computing the previous + subsequences. + + >>> nth_combination(range(5), 3, 5) + (0, 3, 4) + + ``ValueError`` will be raised If *r* is negative or greater than the length + of *iterable*. + ``IndexError`` will be raised if the given *index* is invalid. + """ + pool = tuple(iterable) + n = len(pool) + if (r < 0) or (r > n): + raise ValueError + + c = 1 + k = min(r, n - r) + for i in range(1, k + 1): + c = c * (n - k + i) // i + + if index < 0: + index += c + + if (index < 0) or (index >= c): + raise IndexError + + result = [] + while r: + c, n, r = c * r // n, n - 1, r - 1 + while index >= c: + index -= c + c, n = c * (n - r) // n, n - 1 + result.append(pool[-1 - n]) + + return tuple(result) + + +def prepend(value, iterator): + """Yield *value*, followed by the elements in *iterator*. + + >>> value = '0' + >>> iterator = ['1', '2', '3'] + >>> list(prepend(value, iterator)) + ['0', '1', '2', '3'] + + To prepend multiple values, see :func:`itertools.chain` + or :func:`value_chain`. + + """ + return chain([value], iterator) + + +def convolve(signal, kernel): + """Convolve the iterable *signal* with the iterable *kernel*. + + >>> signal = (1, 2, 3, 4, 5) + >>> kernel = [3, 2, 1] + >>> list(convolve(signal, kernel)) + [3, 8, 14, 20, 26, 14, 5] + + Note: the input arguments are not interchangeable, as the *kernel* + is immediately consumed and stored. + + """ + kernel = tuple(kernel)[::-1] + n = len(kernel) + window = deque([0], maxlen=n) * n + for x in chain(signal, repeat(0, n - 1)): + window.append(x) + yield sum(map(operator.mul, kernel, window)) + + +def before_and_after(predicate, it): + """A variant of :func:`takewhile` that allows complete access to the + remainder of the iterator. + + >>> it = iter('ABCdEfGhI') + >>> all_upper, remainder = before_and_after(str.isupper, it) + >>> ''.join(all_upper) + 'ABC' + >>> ''.join(remainder) # takewhile() would lose the 'd' + 'dEfGhI' + + Note that the first iterator must be fully consumed before the second + iterator can generate valid results. + """ + it = iter(it) + transition = [] + + def true_iterator(): + for elem in it: + if predicate(elem): + yield elem + else: + transition.append(elem) + return + + # Note: this is different from itertools recipes to allow nesting + # before_and_after remainders into before_and_after again. See tests + # for an example. + remainder_iterator = chain(transition, it) + + return true_iterator(), remainder_iterator + + +def triplewise(iterable): + """Return overlapping triplets from *iterable*. + + >>> list(triplewise('ABCDE')) + [('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E')] + + """ + for (a, _), (b, c) in pairwise(pairwise(iterable)): + yield a, b, c + + +def sliding_window(iterable, n): + """Return a sliding window of width *n* over *iterable*. + + >>> list(sliding_window(range(6), 4)) + [(0, 1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5)] + + If *iterable* has fewer than *n* items, then nothing is yielded: + + >>> list(sliding_window(range(3), 4)) + [] + + For a variant with more features, see :func:`windowed`. + """ + it = iter(iterable) + window = deque(islice(it, n), maxlen=n) + if len(window) == n: + yield tuple(window) + for x in it: + window.append(x) + yield tuple(window) + + +def subslices(iterable): + """Return all contiguous non-empty subslices of *iterable*. + + >>> list(subslices('ABC')) + [['A'], ['A', 'B'], ['A', 'B', 'C'], ['B'], ['B', 'C'], ['C']] + + This is similar to :func:`substrings`, but emits items in a different + order. + """ + seq = list(iterable) + slices = starmap(slice, combinations(range(len(seq) + 1), 2)) + return map(operator.getitem, repeat(seq), slices) + + +def polynomial_from_roots(roots): + """Compute a polynomial's coefficients from its roots. + + >>> roots = [5, -4, 3] # (x - 5) * (x + 4) * (x - 3) + >>> polynomial_from_roots(roots) # x^3 - 4 * x^2 - 17 * x + 60 + [1, -4, -17, 60] + """ + # Use math.prod for Python 3.8+, + prod = getattr(math, 'prod', lambda x: reduce(operator.mul, x, 1)) + roots = list(map(operator.neg, roots)) + return [ + sum(map(prod, combinations(roots, k))) for k in range(len(roots) + 1) + ] + + +def iter_index(iterable, value, start=0): + """Yield the index of each place in *iterable* that *value* occurs, + beginning with index *start*. + + See :func:`locate` for a more general means of finding the indexes + associated with particular values. + + >>> list(iter_index('AABCADEAF', 'A')) + [0, 1, 4, 7] + """ + try: + seq_index = iterable.index + except AttributeError: + # Slow path for general iterables + it = islice(iterable, start, None) + for i, element in enumerate(it, start): + if element is value or element == value: + yield i + else: + # Fast path for sequences + i = start - 1 + try: + while True: + i = seq_index(value, i + 1) + yield i + except ValueError: + pass + + +def sieve(n): + """Yield the primes less than n. + + >>> list(sieve(30)) + [2, 3, 5, 7, 11, 13, 17, 19, 23, 29] + """ + isqrt = getattr(math, 'isqrt', lambda x: int(math.sqrt(x))) + data = bytearray((0, 1)) * (n // 2) + data[:3] = 0, 0, 0 + limit = isqrt(n) + 1 + for p in compress(range(limit), data): + data[p * p : n : p + p] = bytes(len(range(p * p, n, p + p))) + data[2] = 1 + return iter_index(data, 1) if n > 2 else iter([]) + + +def batched(iterable, n): + """Batch data into lists of length *n*. The last batch may be shorter. + + >>> list(batched('ABCDEFG', 3)) + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] + + This recipe is from the ``itertools`` docs. This library also provides + :func:`chunked`, which has a different implementation. + """ + if hexversion >= 0x30C00A0: # Python 3.12.0a0 + warnings.warn( + ( + 'batched will be removed in a future version of ' + 'more-itertools. Use the standard library ' + 'itertools.batched function instead' + ), + DeprecationWarning, + ) + + it = iter(iterable) + while True: + batch = list(islice(it, n)) + if not batch: + break + yield batch + + +def transpose(it): + """Swap the rows and columns of the input. + + >>> list(transpose([(1, 2, 3), (11, 22, 33)])) + [(1, 11), (2, 22), (3, 33)] + + The caller should ensure that the dimensions of the input are compatible. + """ + # TODO: when 3.9 goes end-of-life, add stric=True to this. + return zip(*it) + + +def matmul(m1, m2): + """Multiply two matrices. + >>> list(matmul([(7, 5), (3, 5)], [(2, 5), (7, 9)])) + [[49, 80], [41, 60]] + + The caller should ensure that the dimensions of the input matrices are + compatible with each other. + """ + n = len(m2[0]) + return batched(starmap(dotproduct, product(m1, transpose(m2))), n) + + +def factor(n): + """Yield the prime factors of n. + >>> list(factor(360)) + [2, 2, 2, 3, 3, 5] + """ + isqrt = getattr(math, 'isqrt', lambda x: int(math.sqrt(x))) + for prime in sieve(isqrt(n) + 1): + while True: + quotient, remainder = divmod(n, prime) + if remainder: + break + yield prime + n = quotient + if n == 1: + return + if n >= 2: + yield n diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/__about__.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/__about__.py deleted file mode 100644 index 4d998578d7..0000000000 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/__about__.py +++ /dev/null @@ -1,27 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. -from __future__ import absolute_import, division, print_function - -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] - -__title__ = "packaging" -__summary__ = "Core utilities for Python packages" -__uri__ = "https://github.com/pypa/packaging" - -__version__ = "20.4" - -__author__ = "Donald Stufft and individual contributors" -__email__ = "donald@stufft.io" - -__license__ = "BSD-2-Clause or Apache-2.0" -__copyright__ = "Copyright 2014-2019 %s" % __author__ diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/__init__.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/__init__.py index a0cf67df52..13cadc7f04 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/__init__.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/__init__.py @@ -1,26 +1,15 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -from .__about__ import ( - __author__, - __copyright__, - __email__, - __license__, - __summary__, - __title__, - __uri__, - __version__, -) +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] +__version__ = "23.1" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014-2019 %s" % __author__ diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_compat.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_compat.py deleted file mode 100644 index e54bd4ede8..0000000000 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_compat.py +++ /dev/null @@ -1,38 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. -from __future__ import absolute_import, division, print_function - -import sys - -from ._typing import TYPE_CHECKING - -if TYPE_CHECKING: # pragma: no cover - from typing import Any, Dict, Tuple, Type - - -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -# flake8: noqa - -if PY3: - string_types = (str,) -else: - string_types = (basestring,) - - -def with_metaclass(meta, *bases): - # type: (Type[Any], Tuple[Type[Any], ...]) -> Any - """ - Create a base class with a metaclass. - """ - # This requires a bit of explanation: the basic idea is to make a dummy - # metaclass for one level of class instantiation that replaces itself with - # the actual metaclass. - class metaclass(meta): # type: ignore - def __new__(cls, name, this_bases, d): - # type: (Type[Any], str, Tuple[Any], Dict[Any, Any]) -> Any - return meta(name, bases, d) - - return type.__new__(metaclass, "temporary_class", (), {}) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_elffile.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_elffile.py new file mode 100644 index 0000000000..6fb19b30bb --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB. + (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB. + (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_manylinux.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_manylinux.py new file mode 100644 index 0000000000..449c655be6 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_manylinux.py @@ -0,0 +1,240 @@ +import collections +import contextlib +import functools +import os +import re +import sys +import warnings +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: + try: + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None + + +def _is_linux_armhf(executable: str) -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) + + +def _have_compatible_abi(executable: str, arch: str) -> bool: + if arch == "armv7l": + return _is_linux_armhf(executable) + if arch == "i686": + return _is_linux_i686(executable) + return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # Should be a string like "glibc 2.17". + version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.rsplit() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str) + if not m: + warnings.warn( + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux # noqa + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(linux: str, arch: str) -> Iterator[str]: + if not _have_compatible_abi(sys.executable, arch): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if arch in {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(tag, arch, glibc_version): + yield linux.replace("linux", tag) + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(legacy_tag, arch, glibc_version): + yield linux.replace("linux", legacy_tag) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_musllinux.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_musllinux.py new file mode 100644 index 0000000000..706ba600a9 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_musllinux.py @@ -0,0 +1,80 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import functools +import re +import subprocess +import sys +from typing import Iterator, NamedTuple, Optional + +from ._elffile import ELFFile + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(arch: str) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param arch: Should be the part of platform tag after the ``linux_`` + prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a + prerequisite for the current platform to be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_parser.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_parser.py new file mode 100644 index 0000000000..5a18b758fe --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_parser.py @@ -0,0 +1,353 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_structures.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_structures.py index 800d5c5588..90a6465f96 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_structures.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_structures.py @@ -1,85 +1,60 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -class InfinityType(object): - def __repr__(self): - # type: () -> str +class InfinityType: + def __repr__(self) -> str: return "Infinity" - def __hash__(self): - # type: () -> int + def __hash__(self) -> int: return hash(repr(self)) - def __lt__(self, other): - # type: (object) -> bool + def __lt__(self, other: object) -> bool: return False - def __le__(self, other): - # type: (object) -> bool + def __le__(self, other: object) -> bool: return False - def __eq__(self, other): - # type: (object) -> bool + def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other): - # type: (object) -> bool - return not isinstance(other, self.__class__) - - def __gt__(self, other): - # type: (object) -> bool + def __gt__(self, other: object) -> bool: return True - def __ge__(self, other): - # type: (object) -> bool + def __ge__(self, other: object) -> bool: return True - def __neg__(self): - # type: (object) -> NegativeInfinityType + def __neg__(self: object) -> "NegativeInfinityType": return NegativeInfinity Infinity = InfinityType() -class NegativeInfinityType(object): - def __repr__(self): - # type: () -> str +class NegativeInfinityType: + def __repr__(self) -> str: return "-Infinity" - def __hash__(self): - # type: () -> int + def __hash__(self) -> int: return hash(repr(self)) - def __lt__(self, other): - # type: (object) -> bool + def __lt__(self, other: object) -> bool: return True - def __le__(self, other): - # type: (object) -> bool + def __le__(self, other: object) -> bool: return True - def __eq__(self, other): - # type: (object) -> bool + def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) - def __ne__(self, other): - # type: (object) -> bool - return not isinstance(other, self.__class__) - - def __gt__(self, other): - # type: (object) -> bool + def __gt__(self, other: object) -> bool: return False - def __ge__(self, other): - # type: (object) -> bool + def __ge__(self, other: object) -> bool: return False - def __neg__(self): - # type: (object) -> InfinityType + def __neg__(self: object) -> InfinityType: return Infinity diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_tokenizer.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_tokenizer.py new file mode 100644 index 0000000000..dd0d648d49 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_typing.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/_typing.py deleted file mode 100644 index 77a8b9185a..0000000000 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/_typing.py +++ /dev/null @@ -1,48 +0,0 @@ -"""For neatly implementing static typing in packaging. - -`mypy` - the static type analysis tool we use - uses the `typing` module, which -provides core functionality fundamental to mypy's functioning. - -Generally, `typing` would be imported at runtime and used in that fashion - -it acts as a no-op at runtime and does not have any run-time overhead by -design. - -As it turns out, `typing` is not vendorable - it uses separate sources for -Python 2/Python 3. Thus, this codebase can not expect it to be present. -To work around this, mypy allows the typing import to be behind a False-y -optional to prevent it from running at runtime and type-comments can be used -to remove the need for the types to be accessible directly during runtime. - -This module provides the False-y guard in a nicely named fashion so that a -curious maintainer can reach here to read this. - -In packaging, all static-typing related imports should be guarded as follows: - - from packaging._typing import TYPE_CHECKING - - if TYPE_CHECKING: - from typing import ... - -Ref: https://github.com/python/mypy/issues/3216 -""" - -__all__ = ["TYPE_CHECKING", "cast"] - -# The TYPE_CHECKING constant defined by the typing module is False at runtime -# but True while type checking. -if False: # pragma: no cover - from typing import TYPE_CHECKING -else: - TYPE_CHECKING = False - -# typing's cast syntax requires calling typing.cast at runtime, but we don't -# want to import typing at runtime. Here, we inform the type checkers that -# we're importing `typing.cast` as `cast` and re-implement typing.cast's -# runtime behavior in a block that is ignored by type checkers. -if TYPE_CHECKING: # pragma: no cover - # not executed at runtime - from typing import cast -else: - # executed at runtime - def cast(type_, value): # noqa - return value diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/markers.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/markers.py index fd1559c10e..8b98fca723 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/markers.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/markers.py @@ -1,26 +1,24 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import operator import os import platform import sys - -from pkg_resources.extern.pyparsing import ParseException, ParseResults, stringStart, stringEnd -from pkg_resources.extern.pyparsing import ZeroOrMore, Group, Forward, QuotedString -from pkg_resources.extern.pyparsing import Literal as L # noqa - -from ._compat import string_types -from ._typing import TYPE_CHECKING -from .specifiers import Specifier, InvalidSpecifier - -if TYPE_CHECKING: # pragma: no cover - from typing import Any, Callable, Dict, List, Optional, Tuple, Union - - Operator = Callable[[str, str], bool] - +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, +) +from ._tokenizer import ParserSyntaxError +from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name __all__ = [ "InvalidMarker", @@ -30,6 +28,8 @@ __all__ = [ "default_environment", ] +Operator = Callable[[str, str], bool] + class InvalidMarker(ValueError): """ @@ -50,111 +50,27 @@ class UndefinedEnvironmentName(ValueError): """ -class Node(object): - def __init__(self, value): - # type: (Any) -> None - self.value = value - - def __str__(self): - # type: () -> str - return str(self.value) - - def __repr__(self): - # type: () -> str - return "<{0}({1!r})>".format(self.__class__.__name__, str(self)) - - def serialize(self): - # type: () -> str - raise NotImplementedError - - -class Variable(Node): - def serialize(self): - # type: () -> str - return str(self) - - -class Value(Node): - def serialize(self): - # type: () -> str - return '"{0}"'.format(self) - - -class Op(Node): - def serialize(self): - # type: () -> str - return str(self) - - -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results): - # type: (Union[ParseResults, List[Any]]) -> List[Any] - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - else: - return results +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results -def _format_marker(marker, first=True): - # type: (Union[List[str], Tuple[Node, ...], str], Optional[bool]) -> str +def _format_marker( + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True +) -> str: - assert isinstance(marker, (list, tuple, string_types)) + assert isinstance(marker, (list, tuple, str)) # Sometimes we have a structure like [[...]] which is a single item list # where the single item is itself it's own list. In that case we want skip @@ -179,7 +95,7 @@ def _format_marker(marker, first=True): return marker -_operators = { +_operators: Dict[str, Operator] = { "in": lambda lhs, rhs: lhs in rhs, "not in": lambda lhs, rhs: lhs not in rhs, "<": operator.lt, @@ -188,52 +104,41 @@ _operators = { "!=": operator.ne, ">=": operator.ge, ">": operator.gt, -} # type: Dict[str, Operator] +} -def _eval_op(lhs, op, rhs): - # type: (str, Op, str) -> bool +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: try: spec = Specifier("".join([op.serialize(), rhs])) except InvalidSpecifier: pass else: - return spec.contains(lhs) + return spec.contains(lhs, prereleases=True) - oper = _operators.get(op.serialize()) # type: Optional[Operator] + oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: - raise UndefinedComparison( - "Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs) - ) + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") return oper(lhs, rhs) -class Undefined(object): - pass - - -_undefined = Undefined() +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) + # other environment markers don't have such standards + return values -def _get_env(environment, name): - # type: (Dict[str, str], str) -> str - value = environment.get(name, _undefined) # type: Union[str, Undefined] - if isinstance(value, Undefined): - raise UndefinedEnvironmentName( - "{0!r} does not exist in evaluation environment.".format(name) - ) - - return value - - -def _evaluate_markers(markers, environment): - # type: (List[Any], Dict[str, str]) -> bool - groups = [[]] # type: List[List[bool]] +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] for marker in markers: - assert isinstance(marker, (list, tuple, string_types)) + assert isinstance(marker, (list, tuple, str)) if isinstance(marker, list): groups[-1].append(_evaluate_markers(marker, environment)) @@ -241,12 +146,15 @@ def _evaluate_markers(markers, environment): lhs, op, rhs = marker if isinstance(lhs, Variable): - lhs_value = _get_env(environment, lhs.value) + environment_key = lhs.value + lhs_value = environment[environment_key] rhs_value = rhs.value else: lhs_value = lhs.value - rhs_value = _get_env(environment, rhs.value) + environment_key = rhs.value + rhs_value = environment[environment_key] + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) groups[-1].append(_eval_op(lhs_value, op, rhs_value)) else: assert marker in ["and", "or"] @@ -256,8 +164,7 @@ def _evaluate_markers(markers, environment): return any(all(item) for item in groups) -def format_full_version(info): - # type: (sys._version_info) -> str +def format_full_version(info: "sys._version_info") -> str: version = "{0.major}.{0.minor}.{0.micro}".format(info) kind = info.releaselevel if kind != "final": @@ -265,18 +172,9 @@ def format_full_version(info): return version -def default_environment(): - # type: () -> Dict[str, str] - if hasattr(sys, "implementation"): - # Ignoring the `sys.implementation` reference for type checking due to - # mypy not liking that the attribute doesn't exist in Python 2.7 when - # run with the `--py27` flag. - iver = format_full_version(sys.implementation.version) # type: ignore - implementation_name = sys.implementation.name # type: ignore - else: - iver = "0" - implementation_name = "" - +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name return { "implementation_name": implementation_name, "implementation_version": iver, @@ -292,27 +190,48 @@ def default_environment(): } -class Marker(object): - def __init__(self, marker): - # type: (str) -> None +class Marker: + def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) - except ParseException as e: - err_str = "Invalid marker: {0!r}, parse error at {1!r}".format( - marker, marker[e.loc : e.loc + 8] - ) - raise InvalidMarker(err_str) - - def __str__(self): - # type: () -> str + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (<Variable('python_version')>, <Op('>')>, <Value('3.6')>), + # 'and', + # [ + # (<Variable('python_version')>, <Op('==')>, <Value('3.6')>), + # 'or', + # (<Variable('os_name')>, <Op('==')>, <Value('unix')>) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e + + def __str__(self) -> str: return _format_marker(self._markers) - def __repr__(self): - # type: () -> str - return "<Marker({0!r})>".format(str(self)) + def __repr__(self) -> str: + return f"<Marker('{self}')>" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) - def evaluate(self, environment=None): - # type: (Optional[Dict[str, str]]) -> bool + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. Return the boolean from evaluating the given marker against the @@ -322,7 +241,12 @@ class Marker(object): The environment is determined from the current Python process. """ current_environment = default_environment() + current_environment["extra"] = "" if environment is not None: current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" return _evaluate_markers(self._markers, current_environment) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py new file mode 100644 index 0000000000..e76a60c395 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/metadata.py @@ -0,0 +1,408 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import Dict, List, Optional, Tuple, Union, cast + +if sys.version_info >= (3, 8): # pragma: no cover + from typing import TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import TypedDict + else: + try: + from typing_extensions import TypedDict + except ImportError: + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emiting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_STRING_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata. + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_STRING_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/requirements.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/requirements.py index 9495a1df1e..f34bfa85c8 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/requirements.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/requirements.py @@ -1,22 +1,14 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function -import string -import re +import urllib.parse +from typing import Any, List, Optional, Set -from pkg_resources.extern.pyparsing import stringStart, stringEnd, originalTextFor, ParseException -from pkg_resources.extern.pyparsing import ZeroOrMore, Word, Optional, Regex, Combine -from pkg_resources.extern.pyparsing import Literal as L # noqa -from urllib import parse as urlparse - -from ._typing import TYPE_CHECKING -from .markers import MARKER_EXPR, Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet - -if TYPE_CHECKING: # pragma: no cover - from typing import List +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet class InvalidRequirement(ValueError): @@ -25,61 +17,7 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) - -VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pkg_resources.extern.pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - -class Requirement(object): +class Requirement: """Parse a requirement. Parse a given requirement string into its parts, such as name, specifier, @@ -92,54 +30,66 @@ class Requirement(object): # the thing as well as the version? What about the markers? # TODO: Can we normalize the name and extra name? - def __init__(self, requirement_string): - # type: (str) -> None + def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - 'Parse error at "{0!r}": {1}'.format( - requirement_string[e.loc : e.loc + 8], e.msg - ) - ) - - self.name = req.name - if req.url: - parsed_url = urlparse.urlparse(req.url) + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + if parsed.url: + parsed_url = urllib.parse.urlparse(parsed.url) if parsed_url.scheme == "file": - if urlparse.urlunparse(parsed_url) != req.url: + if urllib.parse.urlunparse(parsed_url) != parsed.url: raise InvalidRequirement("Invalid URL given") elif not (parsed_url.scheme and parsed_url.netloc) or ( not parsed_url.scheme and not parsed_url.netloc ): - raise InvalidRequirement("Invalid URL: {0}".format(req.url)) - self.url = req.url + raise InvalidRequirement(f"Invalid URL: {parsed.url}") + self.url: Optional[str] = parsed.url else: self.url = None - self.extras = set(req.extras.asList() if req.extras else []) - self.specifier = SpecifierSet(req.specifier) - self.marker = req.marker if req.marker else None + self.extras: Set[str] = set(parsed.extras if parsed.extras else []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) - def __str__(self): - # type: () -> str - parts = [self.name] # type: List[str] + def __str__(self) -> str: + parts: List[str] = [self.name] if self.extras: - parts.append("[{0}]".format(",".join(sorted(self.extras)))) + formatted_extras = ",".join(sorted(self.extras)) + parts.append(f"[{formatted_extras}]") if self.specifier: parts.append(str(self.specifier)) if self.url: - parts.append("@ {0}".format(self.url)) + parts.append(f"@ {self.url}") if self.marker: parts.append(" ") if self.marker: - parts.append("; {0}".format(self.marker)) + parts.append(f"; {self.marker}") return "".join(parts) - def __repr__(self): - # type: () -> str - return "<Requirement({0!r})>".format(str(self)) + def __repr__(self) -> str: + return f"<Requirement('{self}')>" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + self.name == other.name + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/specifiers.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/specifiers.py index fe09bb1dbb..ba8fe37b7f 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/specifiers.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/specifiers.py @@ -1,339 +1,123 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" import abc -import functools import itertools import re +from typing import ( + Callable, + Iterable, + Iterator, + List, + Optional, + Set, + Tuple, + TypeVar, + Union, +) -from ._compat import string_types, with_metaclass -from ._typing import TYPE_CHECKING from .utils import canonicalize_version -from .version import Version, LegacyVersion, parse - -if TYPE_CHECKING: # pragma: no cover - from typing import ( - List, - Dict, - Union, - Iterable, - Iterator, - Optional, - Callable, - Tuple, - FrozenSet, - ) +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] - ParsedVersion = Union[Version, LegacyVersion] - UnparsedVersion = Union[Version, LegacyVersion, str] - CallableOperator = Callable[[ParsedVersion, str], bool] + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version class InvalidSpecifier(ValueError): """ - An invalid specifier was found, users should refer to PEP 440. + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' """ -class BaseSpecifier(with_metaclass(abc.ABCMeta, object)): # type: ignore +class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod - def __str__(self): - # type: () -> str + def __str__(self) -> str: """ - Returns the str representation of this Specifier like object. This + Returns the str representation of this Specifier-like object. This should be representative of the Specifier itself. """ @abc.abstractmethod - def __hash__(self): - # type: () -> int + def __hash__(self) -> int: """ - Returns a hash value for this Specifier like object. + Returns a hash value for this Specifier-like object. """ @abc.abstractmethod - def __eq__(self, other): - # type: (object) -> bool + def __eq__(self, other: object) -> bool: """ - Returns a boolean representing whether or not the two Specifier like + Returns a boolean representing whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. """ + @property @abc.abstractmethod - def __ne__(self, other): - # type: (object) -> bool - """ - Returns a boolean representing whether or not the two Specifier like - objects are not equal. - """ + def prereleases(self) -> Optional[bool]: + """Whether or not pre-releases as a whole are allowed. - @abc.abstractproperty - def prereleases(self): - # type: () -> Optional[bool] - """ - Returns whether or not pre-releases as a whole are allowed by this - specifier. + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. """ @prereleases.setter - def prereleases(self, value): - # type: (bool) -> None - """ - Sets whether or not pre-releases as a whole are allowed by this - specifier. + def prereleases(self, value: bool) -> None: + """Setter for :attr:`prereleases`. + + :param value: The value to set. """ @abc.abstractmethod - def contains(self, item, prereleases=None): - # type: (str, Optional[bool]) -> bool + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: """ Determines if the given item is contained within this specifier. """ @abc.abstractmethod - def filter(self, iterable, prereleases=None): - # type: (Iterable[UnparsedVersion], Optional[bool]) -> Iterable[UnparsedVersion] + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. """ -class _IndividualSpecifier(BaseSpecifier): - - _operators = {} # type: Dict[str, str] - - def __init__(self, spec="", prereleases=None): - # type: (str, Optional[bool]) -> None - match = self._regex.search(spec) - if not match: - raise InvalidSpecifier("Invalid specifier: '{0}'".format(spec)) - - self._spec = ( - match.group("operator").strip(), - match.group("version").strip(), - ) # type: Tuple[str, str] +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. - # Store whether or not this Specifier should accept prereleases - self._prereleases = prereleases + .. tip:: - def __repr__(self): - # type: () -> str - pre = ( - ", prereleases={0!r}".format(self.prereleases) - if self._prereleases is not None - else "" - ) - - return "<{0}({1!r}{2})>".format(self.__class__.__name__, str(self), pre) - - def __str__(self): - # type: () -> str - return "{0}{1}".format(*self._spec) - - @property - def _canonical_spec(self): - # type: () -> Tuple[str, Union[Version, str]] - return self._spec[0], canonicalize_version(self._spec[1]) - - def __hash__(self): - # type: () -> int - return hash(self._canonical_spec) - - def __eq__(self, other): - # type: (object) -> bool - if isinstance(other, string_types): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._canonical_spec == other._canonical_spec - - def __ne__(self, other): - # type: (object) -> bool - if isinstance(other, string_types): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._spec != other._spec - - def _get_operator(self, op): - # type: (str) -> CallableOperator - operator_callable = getattr( - self, "_compare_{0}".format(self._operators[op]) - ) # type: CallableOperator - return operator_callable - - def _coerce_version(self, version): - # type: (UnparsedVersion) -> ParsedVersion - if not isinstance(version, (LegacyVersion, Version)): - version = parse(version) - return version - - @property - def operator(self): - # type: () -> str - return self._spec[0] - - @property - def version(self): - # type: () -> str - return self._spec[1] - - @property - def prereleases(self): - # type: () -> Optional[bool] - return self._prereleases - - @prereleases.setter - def prereleases(self, value): - # type: (bool) -> None - self._prereleases = value - - def __contains__(self, item): - # type: (str) -> bool - return self.contains(item) - - def contains(self, item, prereleases=None): - # type: (UnparsedVersion, Optional[bool]) -> bool - - # Determine if prereleases are to be allowed or not. - if prereleases is None: - prereleases = self.prereleases - - # Normalize item to a Version or LegacyVersion, this allows us to have - # a shortcut for ``"2.0" in Specifier(">=2") - normalized_item = self._coerce_version(item) - - # Determine if we should be supporting prereleases in this specifier - # or not, if we do not support prereleases than we can short circuit - # logic if this version is a prereleases. - if normalized_item.is_prerelease and not prereleases: - return False - - # Actually do the comparison to determine if this item is contained - # within this Specifier or not. - operator_callable = self._get_operator(self.operator) # type: CallableOperator - return operator_callable(normalized_item, self.version) - - def filter(self, iterable, prereleases=None): - # type: (Iterable[UnparsedVersion], Optional[bool]) -> Iterable[UnparsedVersion] - - yielded = False - found_prereleases = [] - - kw = {"prereleases": prereleases if prereleases is not None else True} - - # Attempt to iterate over all the values in the iterable and if any of - # them match, yield them. - for version in iterable: - parsed_version = self._coerce_version(version) - - if self.contains(parsed_version, **kw): - # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later incase nothing - # else matches this specifier. - if parsed_version.is_prerelease and not ( - prereleases or self.prereleases - ): - found_prereleases.append(version) - # Either this is not a prerelease, or we should have been - # accepting prereleases from the beginning. - else: - yielded = True - yield version - - # Now that we've iterated over everything, determine if we've yielded - # any values, and if we have not and we have any prereleases stored up - # then we will go ahead and yield the prereleases. - if not yielded and found_prereleases: - for version in found_prereleases: - yield version - - -class LegacySpecifier(_IndividualSpecifier): - - _regex_str = r""" - (?P<operator>(==|!=|<=|>=|<|>)) - \s* - (?P<version> - [^,;\s)]* # Since this is a "legacy" specifier, and the version - # string can be just about anything, we match everything - # except for whitespace, a semi-colon for marker support, - # a closing paren since versions can be enclosed in - # them, and a comma since it's a version separator. - ) - """ - - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) - - _operators = { - "==": "equal", - "!=": "not_equal", - "<=": "less_than_equal", - ">=": "greater_than_equal", - "<": "less_than", - ">": "greater_than", - } - - def _coerce_version(self, version): - # type: (Union[ParsedVersion, str]) -> LegacyVersion - if not isinstance(version, LegacyVersion): - version = LegacyVersion(str(version)) - return version - - def _compare_equal(self, prospective, spec): - # type: (LegacyVersion, str) -> bool - return prospective == self._coerce_version(spec) - - def _compare_not_equal(self, prospective, spec): - # type: (LegacyVersion, str) -> bool - return prospective != self._coerce_version(spec) - - def _compare_less_than_equal(self, prospective, spec): - # type: (LegacyVersion, str) -> bool - return prospective <= self._coerce_version(spec) - - def _compare_greater_than_equal(self, prospective, spec): - # type: (LegacyVersion, str) -> bool - return prospective >= self._coerce_version(spec) - - def _compare_less_than(self, prospective, spec): - # type: (LegacyVersion, str) -> bool - return prospective < self._coerce_version(spec) - - def _compare_greater_than(self, prospective, spec): - # type: (LegacyVersion, str) -> bool - return prospective > self._coerce_version(spec) - - -def _require_version_compare( - fn # type: (Callable[[Specifier, ParsedVersion, str], bool]) -): - # type: (...) -> Callable[[Specifier, ParsedVersion, str], bool] - @functools.wraps(fn) - def wrapped(self, prospective, spec): - # type: (Specifier, ParsedVersion, str) -> bool - if not isinstance(prospective, Version): - return False - return fn(self, prospective, spec) - - return wrapped - - -class Specifier(_IndividualSpecifier): + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ - _regex_str = r""" + _operator_regex_str = r""" (?P<operator>(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P<version> (?: # The identity operators allow for an escape hatch that will @@ -343,8 +127,10 @@ class Specifier(_IndividualSpecifier): # but included entirely as an escape hatch. (?<====) # Only match for the identity operator \s* - [^\s]* # We just match everything, except for whitespace - # since we are only testing for strict identity. + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. ) | (?: @@ -357,23 +143,23 @@ class Specifier(_IndividualSpecifier): v? (?:[0-9]+!)? # epoch [0-9]+(?:\.[0-9]+)* # release - (?: # pre release - [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) - [-_\.]? - [0-9]* - )? - (?: # post release - (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) - )? - # You cannot use a wild card and a dev or local version - # together so group them with a | and make them optional. + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local - | - \.\* # Wild card syntax of .* )? ) | @@ -388,7 +174,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -413,7 +199,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)* # release (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -425,7 +211,10 @@ class Specifier(_IndividualSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", @@ -438,9 +227,153 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } - @_require_version_compare - def _compare_compatible(self, prospective, spec): - # type: (ParsedVersion, str) -> bool + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + <Specifier('>=1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + <Specifier('>=1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + <Specifier('>=1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to @@ -449,15 +382,9 @@ class Specifier(_IndividualSpecifier): # the other specifiers. # We want everything but the last item in the version, but we want to - # ignore post and dev releases and we want to treat the pre-release as - # it's own separate segment. + # ignore suffix segments. prefix = ".".join( - list( - itertools.takewhile( - lambda x: (not x.startswith("post") and not x.startswith("dev")), - _version_split(spec), - ) - )[:-1] + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] ) # Add the prefix notation to the end of our string @@ -467,35 +394,35 @@ class Specifier(_IndividualSpecifier): prospective, prefix ) - @_require_version_compare - def _compare_equal(self, prospective, spec): - # type: (ParsedVersion, str) -> bool + def _compare_equal(self, prospective: Version, spec: str) -> bool: # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. - prospective = Version(prospective.public) + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - split_spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(normalized_spec) # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - split_prospective = _version_split(str(prospective)) + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - shortened_prospective = split_prospective[: len(split_spec)] - - # Pad out our two sides with zeros so that they both equal the same - # length. - padded_spec, padded_prospective = _pad_version( - split_spec, shortened_prospective - ) + shortened_prospective = padded_prospective[: len(split_spec)] - return padded_prospective == padded_spec + return shortened_prospective == split_spec else: # Convert our spec string into a Version spec_version = Version(spec) @@ -508,32 +435,24 @@ class Specifier(_IndividualSpecifier): return prospective == spec_version - @_require_version_compare - def _compare_not_equal(self, prospective, spec): - # type: (ParsedVersion, str) -> bool + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: return not self._compare_equal(prospective, spec) - @_require_version_compare - def _compare_less_than_equal(self, prospective, spec): - # type: (ParsedVersion, str) -> bool + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) <= Version(spec) - @_require_version_compare - def _compare_greater_than_equal(self, prospective, spec): - # type: (ParsedVersion, str) -> bool + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) >= Version(spec) - @_require_version_compare - def _compare_less_than(self, prospective, spec_str): - # type: (ParsedVersion, str) -> bool + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -558,9 +477,7 @@ class Specifier(_IndividualSpecifier): # version in the spec. return True - @_require_version_compare - def _compare_greater_than(self, prospective, spec_str): - # type: (ParsedVersion, str) -> bool + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -591,48 +508,143 @@ class Specifier(_IndividualSpecifier): # same version in the spec. return True - def _compare_arbitrary(self, prospective, spec): - # type: (Version, str) -> bool + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() - @property - def prereleases(self): - # type: () -> bool + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. - # If there is an explicit prereleases set for this, then we'll just - # blindly use that. - if self._prereleases is not None: - return self._prereleases + :param item: The item to check for. - # Look at all of our specifiers and determine if they are inclusive - # operators, and if they are if they are including an explicit - # prerelease. - operator, version = self._spec - if operator in ["==", ">=", "<=", "~=", "==="]: - # The == specifier can include a trailing .*, if it does we - # want to remove before parsing. - if operator == "==" and version.endswith(".*"): - version = version[:-2] + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - # Parse the version, and if it is a pre-release than this - # specifier allows pre-releases. - if parse(version).is_prerelease: - return True + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) - return False + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ - @prereleases.setter - def prereleases(self, value): - # type: (bool) -> None - self._prereleases = value + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', <Version('1.4')>] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") -def _version_split(version): - # type: (str) -> List[str] - result = [] # type: List[str] +def _version_split(version: str) -> List[str]: + result: List[str] = [] for item in version.split("."): match = _prefix_regex.search(item) if match: @@ -642,8 +654,13 @@ def _version_split(version): return result -def _pad_version(left, right): - # type: (List[str], List[str]) -> Tuple[List[str], List[str]] +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: left_split, right_split = [], [] # Get the release segment of our versions @@ -662,21 +679,39 @@ def _pad_version(left, right): class SpecifierSet(BaseSpecifier): - def __init__(self, specifiers="", prereleases=None): - # type: (str, Optional[bool]) -> None + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ - # Split on , to break each individual specifier into it's own item, and + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + # Split on `,` to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a - # Specifier and falling back to a LegacySpecifier. - parsed = set() + # Specifier. + parsed: Set[Specifier] = set() for specifier in split_specifiers: - try: - parsed.add(Specifier(specifier)) - except InvalidSpecifier: - parsed.add(LegacySpecifier(specifier)) + parsed.add(Specifier(specifier)) # Turn our parsed specifiers into a frozen set and save them for later. self._specs = frozenset(parsed) @@ -685,27 +720,75 @@ class SpecifierSet(BaseSpecifier): # we accept prereleases or not. self._prereleases = prereleases - def __repr__(self): - # type: () -> str + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + <SpecifierSet('!=2.0.0,>=1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + <SpecifierSet('!=2.0.0,>=1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + <SpecifierSet('!=2.0.0,>=1.0.0', prereleases=True)> + """ pre = ( - ", prereleases={0!r}".format(self.prereleases) + f", prereleases={self.prereleases!r}" if self._prereleases is not None else "" ) - return "<SpecifierSet({0!r}{1})>".format(str(self), pre) + return f"<SpecifierSet({str(self)!r}{pre})>" - def __str__(self): - # type: () -> str + def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ return ",".join(sorted(str(s) for s in self._specs)) - def __hash__(self): - # type: () -> int + def __hash__(self) -> int: return hash(self._specs) - def __and__(self, other): - # type: (Union[SpecifierSet, str]) -> SpecifierSet - if isinstance(other, string_types): + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + <SpecifierSet('!=1.0.1,!=2.0.1,<=2.0.0,>=1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + <SpecifierSet('!=1.0.1,!=2.0.1,<=2.0.0,>=1.0.0')> + """ + if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): return NotImplemented @@ -727,66 +810,99 @@ class SpecifierSet(BaseSpecifier): return specifier - def __eq__(self, other): - # type: (object) -> bool - if isinstance(other, (string_types, _IndividualSpecifier)): - other = SpecifierSet(str(other)) - elif not isinstance(other, SpecifierSet): - return NotImplemented + def __eq__(self, other: object) -> bool: + """Whether or not the two SpecifierSet-like objects are equal. - return self._specs == other._specs + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. - def __ne__(self, other): - # type: (object) -> bool - if isinstance(other, (string_types, _IndividualSpecifier)): + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented - return self._specs != other._specs + return self._specs == other._specs - def __len__(self): - # type: () -> int + def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" return len(self._specs) - def __iter__(self): - # type: () -> Iterator[FrozenSet[_IndividualSpecifier]] - return iter(self._specs) - - @property - def prereleases(self): - # type: () -> Optional[bool] + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. - # If we have been given an explicit prerelease modifier, then we'll - # pass that through here. - if self._prereleases is not None: - return self._prereleases + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [<Specifier('!=1.0.1')>, <Specifier('>=1.0.0')>] + """ + return iter(self._specs) - # If we don't have any specifiers, and we don't have a forced value, - # then we'll just return None since we don't know if this should have - # pre-releases or not. - if not self._specs: - return None + def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. - # Otherwise we'll see if any of the given specifiers accept - # prereleases, if any of them do we'll return True, otherwise False. - return any(s.prereleases for s in self._specs) + :param item: The item to check for. - @prereleases.setter - def prereleases(self, value): - # type: (bool) -> None - self._prereleases = value + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - def __contains__(self, item): - # type: (Union[ParsedVersion, str]) -> bool + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ return self.contains(item) - def contains(self, item, prereleases=None): - # type: (Union[ParsedVersion, str], Optional[bool]) -> bool - - # Ensure that our item is a Version or LegacyVersion instance. - if not isinstance(item, (LegacyVersion, Version)): - item = parse(item) + def contains( + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, + ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the @@ -803,6 +919,9 @@ class SpecifierSet(BaseSpecifier): if not prereleases and item.is_prerelease: return False + if installed and item.is_prerelease: + item = Version(item.base_version) + # We simply dispatch to the underlying specs here to make sure that the # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers @@ -810,12 +929,46 @@ class SpecifierSet(BaseSpecifier): return all(s.contains(item, prereleases=prereleases) for s in self._specs) def filter( - self, - iterable, # type: Iterable[Union[ParsedVersion, str]] - prereleases=None, # type: Optional[bool] - ): - # type: (...) -> Iterable[Union[ParsedVersion, str]] - + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', <Version('1.4')>] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -828,24 +981,16 @@ class SpecifierSet(BaseSpecifier): if self._specs: for spec in self._specs: iterable = spec.filter(iterable, prereleases=bool(prereleases)) - return iterable + return iter(iterable) # If we do not have any specifiers, then we need to have a rough filter # which will filter out any pre-releases, unless there are no final - # releases, and which will filter out LegacyVersion in general. + # releases. else: - filtered = [] # type: List[Union[ParsedVersion, str]] - found_prereleases = [] # type: List[Union[ParsedVersion, str]] + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] for item in iterable: - # Ensure that we some kind of Version class for this item. - if not isinstance(item, (LegacyVersion, Version)): - parsed_version = parse(item) - else: - parsed_version = item - - # Filter out any item which is parsed as a LegacyVersion - if isinstance(parsed_version, LegacyVersion): - continue + parsed_version = _coerce_version(item) # Store any item which is a pre-release for later unless we've # already found a final version or we are accepting prereleases @@ -858,6 +1003,6 @@ class SpecifierSet(BaseSpecifier): # If we've found no items except for pre-releases, then we'll go # ahead and use the pre-releases if not filtered and found_prereleases and prereleases is None: - return found_prereleases + return iter(found_prereleases) - return filtered + return iter(filtered) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/tags.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/tags.py index 9064910b8b..76d243414d 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/tags.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/tags.py @@ -2,62 +2,45 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import - -import distutils.util - -try: - from importlib.machinery import EXTENSION_SUFFIXES -except ImportError: # pragma: no cover - import imp - - EXTENSION_SUFFIXES = [x[0] for x in imp.get_suffixes()] - del imp import logging -import os import platform -import re -import struct +import subprocess import sys import sysconfig -import warnings - -from ._typing import TYPE_CHECKING, cast - -if TYPE_CHECKING: # pragma: no cover - from typing import ( - Dict, - FrozenSet, - IO, - Iterable, - Iterator, - List, - Optional, - Sequence, - Tuple, - Union, - ) - - PythonVersion = Sequence[int] - MacVersion = Tuple[int, int] - GlibcVersion = Tuple[int, int] - +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from . import _manylinux, _musllinux logger = logging.getLogger(__name__) -INTERPRETER_SHORT_NAMES = { +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { "python": "py", # Generic. "cpython": "cp", "pypy": "pp", "ironpython": "ip", "jython": "jy", -} # type: Dict[str, str] +} -_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 +_32_BIT_INTERPRETER = sys.maxsize <= 2**32 -class Tag(object): +class Tag: """ A representation of the tag triple for a wheel. @@ -65,55 +48,53 @@ class Tag(object): is also supported. """ - __slots__ = ["_interpreter", "_abi", "_platform"] + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] - def __init__(self, interpreter, abi, platform): - # type: (str, str, str) -> None + def __init__(self, interpreter: str, abi: str, platform: str) -> None: self._interpreter = interpreter.lower() self._abi = abi.lower() self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) @property - def interpreter(self): - # type: () -> str + def interpreter(self) -> str: return self._interpreter @property - def abi(self): - # type: () -> str + def abi(self) -> str: return self._abi @property - def platform(self): - # type: () -> str + def platform(self) -> str: return self._platform - def __eq__(self, other): - # type: (object) -> bool + def __eq__(self, other: object) -> bool: if not isinstance(other, Tag): return NotImplemented return ( - (self.platform == other.platform) - and (self.abi == other.abi) - and (self.interpreter == other.interpreter) + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) ) - def __hash__(self): - # type: () -> int - return hash((self._interpreter, self._abi, self._platform)) + def __hash__(self) -> int: + return self._hash - def __str__(self): - # type: () -> str - return "{}-{}-{}".format(self._interpreter, self._abi, self._platform) + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" - def __repr__(self): - # type: () -> str - return "<{self} @ {self_id}>".format(self=self, self_id=id(self)) + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" -def parse_tag(tag): - # type: (str) -> FrozenSet[Tag] +def parse_tag(tag: str) -> FrozenSet[Tag]: """ Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. @@ -129,25 +110,8 @@ def parse_tag(tag): return frozenset(tags) -def _warn_keyword_parameter(func_name, kwargs): - # type: (str, Dict[str, bool]) -> bool - """ - Backwards-compatibility with Python 2.7 to allow treating 'warn' as keyword-only. - """ - if not kwargs: - return False - elif len(kwargs) > 1 or "warn" not in kwargs: - kwargs.pop("warn", None) - arg = next(iter(kwargs.keys())) - raise TypeError( - "{}() got an unexpected keyword argument {!r}".format(func_name, arg) - ) - return kwargs["warn"] - - -def _get_config_var(name, warn=False): - # type: (str, bool) -> Union[int, str, None] - value = sysconfig.get_config_var(name) +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value: Union[int, str, None] = sysconfig.get_config_var(name) if value is None and warn: logger.debug( "Config variable '%s' is unset, Python ABI tag may be incorrect", name @@ -155,13 +119,11 @@ def _get_config_var(name, warn=False): return value -def _normalize_string(string): - # type: (str) -> str - return string.replace(".", "_").replace("-", "_") +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_").replace(" ", "_") -def _abi3_applies(python_version): - # type: (PythonVersion) -> bool +def _abi3_applies(python_version: PythonVersion) -> bool: """ Determine if the Python version supports abi3. @@ -170,8 +132,7 @@ def _abi3_applies(python_version): return len(python_version) > 1 and tuple(python_version) >= (3, 2) -def _cpython_abis(py_version, warn=False): - # type: (PythonVersion, bool) -> List[str] +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: py_version = tuple(py_version) # To allow for version comparison. abis = [] version = _version_nodot(py_version[:2]) @@ -197,7 +158,7 @@ def _cpython_abis(py_version, warn=False): elif debug: # Debug builds can also load "normal" extension modules. # We can also assume no UCS-4 or pymalloc requirement. - abis.append("cp{version}".format(version=version)) + abis.append(f"cp{version}") abis.insert( 0, "cp{version}{debug}{pymalloc}{ucs4}".format( @@ -208,12 +169,12 @@ def _cpython_abis(py_version, warn=False): def cpython_tags( - python_version=None, # type: Optional[PythonVersion] - abis=None, # type: Optional[Iterable[str]] - platforms=None, # type: Optional[Iterable[str]] - **kwargs # type: bool -): - # type: (...) -> Iterator[Tag] + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: """ Yields the tags for a CPython interpreter. @@ -229,11 +190,10 @@ def cpython_tags( If 'abi3' or 'none' are specified in 'abis' then they will be yielded at their normal position and not at the beginning. """ - warn = _warn_keyword_parameter("cpython_tags", kwargs) if not python_version: python_version = sys.version_info[:2] - interpreter = "cp{}".format(_version_nodot(python_version[:2])) + interpreter = f"cp{_version_nodot(python_version[:2])}" if abis is None: if len(python_version) > 1: @@ -248,15 +208,13 @@ def cpython_tags( except ValueError: pass - platforms = list(platforms or _platform_tags()) + platforms = list(platforms or platform_tags()) for abi in abis: for platform_ in platforms: yield Tag(interpreter, abi, platform_) if _abi3_applies(python_version): - for tag in (Tag(interpreter, "abi3", platform_) for platform_ in platforms): - yield tag - for tag in (Tag(interpreter, "none", platform_) for platform_ in platforms): - yield tag + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) if _abi3_applies(python_version): for minor_version in range(python_version[1] - 1, 1, -1): @@ -267,20 +225,54 @@ def cpython_tags( yield Tag(interpreter, "abi3", platform_) -def _generic_abi(): - # type: () -> Iterator[str] - abi = sysconfig.get_config_var("SOABI") - if abi: - yield _normalize_string(abi) +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] def generic_tags( - interpreter=None, # type: Optional[str] - abis=None, # type: Optional[Iterable[str]] - platforms=None, # type: Optional[Iterable[str]] - **kwargs # type: bool -): - # type: (...) -> Iterator[Tag] + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: """ Yields the tags for a generic interpreter. @@ -289,15 +281,15 @@ def generic_tags( The "none" ABI will be added if it was not explicitly provided. """ - warn = _warn_keyword_parameter("generic_tags", kwargs) if not interpreter: interp_name = interpreter_name() interp_version = interpreter_version(warn=warn) interpreter = "".join([interp_name, interp_version]) if abis is None: abis = _generic_abi() - platforms = list(platforms or _platform_tags()) - abis = list(abis) + else: + abis = list(abis) + platforms = list(platforms or platform_tags()) if "none" not in abis: abis.append("none") for abi in abis: @@ -305,8 +297,7 @@ def generic_tags( yield Tag(interpreter, abi, platform_) -def _py_interpreter_range(py_version): - # type: (PythonVersion) -> Iterator[str] +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: """ Yields Python versions in descending order. @@ -314,19 +305,18 @@ def _py_interpreter_range(py_version): all previous versions of that major version. """ if len(py_version) > 1: - yield "py{version}".format(version=_version_nodot(py_version[:2])) - yield "py{major}".format(major=py_version[0]) + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" if len(py_version) > 1: for minor in range(py_version[1] - 1, -1, -1): - yield "py{version}".format(version=_version_nodot((py_version[0], minor))) + yield f"py{_version_nodot((py_version[0], minor))}" def compatible_tags( - python_version=None, # type: Optional[PythonVersion] - interpreter=None, # type: Optional[str] - platforms=None, # type: Optional[Iterable[str]] -): - # type: (...) -> Iterator[Tag] + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: """ Yields the sequence of tags that are compatible with a specific version of Python. @@ -337,7 +327,7 @@ def compatible_tags( """ if not python_version: python_version = sys.version_info[:2] - platforms = list(platforms or _platform_tags()) + platforms = list(platforms or platform_tags()) for version in _py_interpreter_range(python_version): for platform_ in platforms: yield Tag(version, "none", platform_) @@ -347,8 +337,7 @@ def compatible_tags( yield Tag(version, "none", "any") -def _mac_arch(arch, is_32bit=_32_BIT_INTERPRETER): - # type: (str, bool) -> str +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: if not is_32bit: return arch @@ -358,8 +347,7 @@ def _mac_arch(arch, is_32bit=_32_BIT_INTERPRETER): return "i386" -def _mac_binary_formats(version, cpu_arch): - # type: (MacVersion, str) -> List[str] +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: formats = [cpu_arch] if cpu_arch == "x86_64": if version < (10, 4): @@ -382,12 +370,18 @@ def _mac_binary_formats(version, cpu_arch): return [] formats.extend(["fat32", "fat"]) - formats.append("universal") + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + return formats -def mac_platforms(version=None, arch=None): - # type: (Optional[MacVersion], Optional[str]) -> Iterator[str] +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: """ Yields the platform tags for a macOS system. @@ -396,292 +390,101 @@ def mac_platforms(version=None, arch=None): generate platform tags for. Both parameters default to the appropriate value for the current system. """ - version_str, _, cpu_arch = platform.mac_ver() # type: ignore + version_str, _, cpu_arch = platform.mac_ver() if version is None: version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + universal_newlines=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) else: version = version if arch is None: arch = _mac_arch(cpu_arch) else: arch = arch - for minor_version in range(version[1], -1, -1): - compat_version = version[0], minor_version - binary_formats = _mac_binary_formats(compat_version, arch) - for binary_format in binary_formats: - yield "macosx_{major}_{minor}_{binary_format}".format( - major=compat_version[0], - minor=compat_version[1], - binary_format=binary_format, - ) - - -# From PEP 513. -def _is_manylinux_compatible(name, glibc_version): - # type: (str, GlibcVersion) -> bool - # Check for presence of _manylinux module. - try: - import _manylinux # noqa - return bool(getattr(_manylinux, name + "_compatible")) - except (ImportError, AttributeError): - # Fall through to heuristic check below. - pass - - return _have_compatible_glibc(*glibc_version) - - -def _glibc_version_string(): - # type: () -> Optional[str] - # Returns glibc version string, or None if not using glibc. - return _glibc_version_string_confstr() or _glibc_version_string_ctypes() - - -def _glibc_version_string_confstr(): - # type: () -> Optional[str] - """ - Primary implementation of glibc_version_string using os.confstr. - """ - # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely - # to be broken or missing. This strategy is used in the standard library - # platform module. - # https://github.com/python/cpython/blob/fcf1d003bf4f0100c9d0921ff3d70e1127ca1b71/Lib/platform.py#L175-L183 - try: - # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". - version_string = os.confstr( # type: ignore[attr-defined] # noqa: F821 - "CS_GNU_LIBC_VERSION" - ) - assert version_string is not None - _, version = version_string.split() # type: Tuple[str, str] - except (AssertionError, AttributeError, OSError, ValueError): - # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... - return None - return version + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) -def _glibc_version_string_ctypes(): - # type: () -> Optional[str] - """ - Fallback implementation of glibc_version_string using ctypes. - """ - try: - import ctypes - except ImportError: - return None - - # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen - # manpage says, "If filename is NULL, then the returned handle is for the - # main program". This way we can let the linker do the work to figure out - # which libc our process is actually using. - # - # Note: typeshed is wrong here so we are ignoring this line. - process_namespace = ctypes.CDLL(None) # type: ignore - try: - gnu_get_libc_version = process_namespace.gnu_get_libc_version - except AttributeError: - # Symbol doesn't exist -> therefore, we are not linked to - # glibc. - return None - - # Call gnu_get_libc_version, which returns a string like "2.5" - gnu_get_libc_version.restype = ctypes.c_char_p - version_str = gnu_get_libc_version() # type: str - # py2 / py3 compatibility: - if not isinstance(version_str, str): - version_str = version_str.decode("ascii") - - return version_str - - -# Separated out from have_compatible_glibc for easier unit testing. -def _check_glibc_version(version_str, required_major, minimum_minor): - # type: (str, int, int) -> bool - # Parse string and check against requested version. - # - # We use a regexp instead of str.split because we want to discard any - # random junk that might come after the minor version -- this might happen - # in patched/forked versions of glibc (e.g. Linaro's version of glibc - # uses version strings like "2.20-2014.11"). See gh-3588. - m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str) - if not m: - warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, - RuntimeWarning, - ) - return False - return ( - int(m.group("major")) == required_major - and int(m.group("minor")) >= minimum_minor - ) + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) -def _have_compatible_glibc(required_major, minimum_minor): - # type: (int, int) -> bool - version_str = _glibc_version_string() - if version_str is None: - return False - return _check_glibc_version(version_str, required_major, minimum_minor) - - -# Python does not provide platform information at sufficient granularity to -# identify the architecture of the running executable in some cases, so we -# determine it dynamically by reading the information from the running -# process. This only applies on Linux, which uses the ELF format. -class _ELFFileHeader(object): - # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header - class _InvalidELFFileHeader(ValueError): - """ - An invalid ELF file header was found. - """ - - ELF_MAGIC_NUMBER = 0x7F454C46 - ELFCLASS32 = 1 - ELFCLASS64 = 2 - ELFDATA2LSB = 1 - ELFDATA2MSB = 2 - EM_386 = 3 - EM_S390 = 22 - EM_ARM = 40 - EM_X86_64 = 62 - EF_ARM_ABIMASK = 0xFF000000 - EF_ARM_ABI_VER5 = 0x05000000 - EF_ARM_ABI_FLOAT_HARD = 0x00000400 - - def __init__(self, file): - # type: (IO[bytes]) -> None - def unpack(fmt): - # type: (str) -> int - try: - (result,) = struct.unpack( - fmt, file.read(struct.calcsize(fmt)) - ) # type: (int, ) - except struct.error: - raise _ELFFileHeader._InvalidELFFileHeader() - return result - - self.e_ident_magic = unpack(">I") - if self.e_ident_magic != self.ELF_MAGIC_NUMBER: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_class = unpack("B") - if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_data = unpack("B") - if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_version = unpack("B") - self.e_ident_osabi = unpack("B") - self.e_ident_abiversion = unpack("B") - self.e_ident_pad = file.read(7) - format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H" - format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I" - format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q" - format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q - self.e_type = unpack(format_h) - self.e_machine = unpack(format_h) - self.e_version = unpack(format_i) - self.e_entry = unpack(format_p) - self.e_phoff = unpack(format_p) - self.e_shoff = unpack(format_p) - self.e_flags = unpack(format_i) - self.e_ehsize = unpack(format_h) - self.e_phentsize = unpack(format_h) - self.e_phnum = unpack(format_h) - self.e_shentsize = unpack(format_h) - self.e_shnum = unpack(format_h) - self.e_shstrndx = unpack(format_h) - - -def _get_elf_header(): - # type: () -> Optional[_ELFFileHeader] - try: - with open(sys.executable, "rb") as f: - elf_header = _ELFFileHeader(f) - except (IOError, OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): - return None - return elf_header - - -def _is_linux_armhf(): - # type: () -> bool - # hard-float ABI can be detected from the ELF header of the running - # process - # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_ARM - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABIMASK - ) == elf_header.EF_ARM_ABI_VER5 - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD - ) == elf_header.EF_ARM_ABI_FLOAT_HARD - return result - - -def _is_linux_i686(): - # type: () -> bool - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_386 - return result - - -def _have_compatible_manylinux_abi(arch): - # type: (str) -> bool - if arch == "armv7l": - return _is_linux_armhf() - if arch == "i686": - return _is_linux_i686() - return True - - -def _linux_platforms(is_32bit=_32_BIT_INTERPRETER): - # type: (bool) -> Iterator[str] - linux = _normalize_string(distutils.util.get_platform()) +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) if is_32bit: if linux == "linux_x86_64": linux = "linux_i686" elif linux == "linux_aarch64": linux = "linux_armv7l" - manylinux_support = [] _, arch = linux.split("_", 1) - if _have_compatible_manylinux_abi(arch): - if arch in {"x86_64", "i686", "aarch64", "armv7l", "ppc64", "ppc64le", "s390x"}: - manylinux_support.append( - ("manylinux2014", (2, 17)) - ) # CentOS 7 w/ glibc 2.17 (PEP 599) - if arch in {"x86_64", "i686"}: - manylinux_support.append( - ("manylinux2010", (2, 12)) - ) # CentOS 6 w/ glibc 2.12 (PEP 571) - manylinux_support.append( - ("manylinux1", (2, 5)) - ) # CentOS 5 w/ glibc 2.5 (PEP 513) - manylinux_support_iter = iter(manylinux_support) - for name, glibc_version in manylinux_support_iter: - if _is_manylinux_compatible(name, glibc_version): - yield linux.replace("linux", name) - break - # Support for a later manylinux implies support for an earlier version. - for name, _ in manylinux_support_iter: - yield linux.replace("linux", name) + yield from _manylinux.platform_tags(linux, arch) + yield from _musllinux.platform_tags(arch) yield linux -def _generic_platforms(): - # type: () -> Iterator[str] - yield _normalize_string(distutils.util.get_platform()) +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) -def _platform_tags(): - # type: () -> Iterator[str] +def platform_tags() -> Iterator[str]: """ Provides the platform tags for this installation. """ @@ -693,25 +496,21 @@ def _platform_tags(): return _generic_platforms() -def interpreter_name(): - # type: () -> str +def interpreter_name() -> str: """ Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. """ - try: - name = sys.implementation.name # type: ignore - except AttributeError: # pragma: no cover - # Python 2.7 compatibility. - name = platform.python_implementation().lower() + name = sys.implementation.name return INTERPRETER_SHORT_NAMES.get(name) or name -def interpreter_version(**kwargs): - # type: (bool) -> str +def interpreter_version(*, warn: bool = False) -> str: """ Returns the version of the running interpreter. """ - warn = _warn_keyword_parameter("interpreter_version", kwargs) version = _get_config_var("py_version_nodot", warn=warn) if version: version = str(version) @@ -720,32 +519,28 @@ def interpreter_version(**kwargs): return version -def _version_nodot(version): - # type: (PythonVersion) -> str - if any(v >= 10 for v in version): - sep = "_" - else: - sep = "" - return sep.join(map(str, version)) +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) -def sys_tags(**kwargs): - # type: (bool) -> Iterator[Tag] +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: """ Returns the sequence of tag triples for the running interpreter. The order of the sequence corresponds to priority order for the interpreter, from most to least important. """ - warn = _warn_keyword_parameter("sys_tags", kwargs) interp_name = interpreter_name() if interp_name == "cp": - for tag in cpython_tags(warn=warn): - yield tag + yield from cpython_tags(warn=warn) else: - for tag in generic_tags(): - yield tag + yield from generic_tags() - for tag in compatible_tags(): - yield tag + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/utils.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/utils.py index 19579c1a0f..33c613b749 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/utils.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/utils.py @@ -1,65 +1,141 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function import re +from typing import FrozenSet, NewType, Tuple, Union, cast -from ._typing import TYPE_CHECKING, cast +from .tags import Tag, parse_tag from .version import InvalidVersion, Version -if TYPE_CHECKING: # pragma: no cover - from typing import NewType, Union +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ - NormalizedName = NewType("NormalizedName", str) _canonicalize_regex = re.compile(r"[-_.]+") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name): - # type: (str) -> NormalizedName +def canonicalize_name(name: str) -> NormalizedName: # This is taken from PEP 503. value = _canonicalize_regex.sub("-", name).lower() - return cast("NormalizedName", value) + return cast(NormalizedName, value) -def canonicalize_version(_version): - # type: (str) -> Union[Version, str] +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: """ This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. """ - - try: - version = Version(_version) - except InvalidVersion: - # Legacy versions cannot be normalized - return _version + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version parts = [] # Epoch - if version.epoch != 0: - parts.append("{0}!".format(version.epoch)) + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") # Release segment - # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in version.release))) + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) # Pre-release - if version.pre is not None: - parts.append("".join(str(x) for x in version.pre)) + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) # Post-release - if version.post is not None: - parts.append(".post{0}".format(version.post)) + if parsed.post is not None: + parts.append(f".post{parsed.post}") # Development release - if version.dev is not None: - parts.append(".dev{0}".format(version.dev)) + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") # Local version segment - if version.local is not None: - parts.append("+{0}".format(version.local)) + if parsed.local is not None: + parts.append(f"+{parsed.local}") return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + version = Version(parts[1]) + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + version = Version(version_part) + return (name, version) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/packaging/version.py b/third_party/python/setuptools/pkg_resources/_vendor/packaging/version.py index 00371e86a8..b30e8cbf84 100644 --- a/third_party/python/setuptools/pkg_resources/_vendor/packaging/version.py +++ b/third_party/python/setuptools/pkg_resources/_vendor/packaging/version.py @@ -1,238 +1,116 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from __future__ import absolute_import, division, print_function +""" +.. testsetup:: + + from packaging.version import parse, Version +""" import collections import itertools import re - -from ._structures import Infinity, NegativeInfinity -from ._typing import TYPE_CHECKING - -if TYPE_CHECKING: # pragma: no cover - from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union - - from ._structures import InfinityType, NegativeInfinityType - - InfiniteTypes = Union[InfinityType, NegativeInfinityType] - PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] - SubLocalType = Union[InfiniteTypes, int, str] - LocalType = Union[ - NegativeInfinityType, - Tuple[ - Union[ - SubLocalType, - Tuple[SubLocalType, str], - Tuple[NegativeInfinityType, SubLocalType], - ], - ..., +from typing import Any, Callable, Optional, SupportsInt, Tuple, Union + +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType + +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] + +InfiniteTypes = Union[InfinityType, NegativeInfinityType] +PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] +SubLocalType = Union[InfiniteTypes, int, str] +LocalType = Union[ + NegativeInfinityType, + Tuple[ + Union[ + SubLocalType, + Tuple[SubLocalType, str], + Tuple[NegativeInfinityType, SubLocalType], ], - ] - CmpKey = Tuple[ - int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType - ] - LegacyCmpKey = Tuple[int, Tuple[str, ...]] - VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool - ] - -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] - + ..., + ], +] +CmpKey = Tuple[ + int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType +] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] _Version = collections.namedtuple( "_Version", ["epoch", "release", "dev", "pre", "post", "local"] ) -def parse(version): - # type: (str) -> Union[LegacyVersion, Version] - """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. - """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) +def parse(version: str) -> "Version": + """Parse the given version string. + >>> parse('1.0.dev1') + <Version('1.0.dev1')> -class InvalidVersion(ValueError): + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. """ - An invalid version was found, users should refer to PEP 440. - """ - - -class _BaseVersion(object): - _key = None # type: Union[CmpKey, LegacyCmpKey] - - def __hash__(self): - # type: () -> int - return hash(self._key) + return Version(version) - def __lt__(self, other): - # type: (_BaseVersion) -> bool - return self._compare(other, lambda s, o: s < o) - def __le__(self, other): - # type: (_BaseVersion) -> bool - return self._compare(other, lambda s, o: s <= o) +class InvalidVersion(ValueError): + """Raised when a version string is not a valid version. - def __eq__(self, other): - # type: (object) -> bool - return self._compare(other, lambda s, o: s == o) + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' + """ - def __ge__(self, other): - # type: (_BaseVersion) -> bool - return self._compare(other, lambda s, o: s >= o) - def __gt__(self, other): - # type: (_BaseVersion) -> bool - return self._compare(other, lambda s, o: s > o) +class _BaseVersion: + _key: Tuple[Any, ...] - def __ne__(self, other): - # type: (object) -> bool - return self._compare(other, lambda s, o: s != o) + def __hash__(self) -> int: + return hash(self._key) - def _compare(self, other, method): - # type: (object, VersionComparisonMethod) -> Union[bool, NotImplemented] + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: if not isinstance(other, _BaseVersion): return NotImplemented - return method(self._key, other._key) - - -class LegacyVersion(_BaseVersion): - def __init__(self, version): - # type: (str) -> None - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - def __str__(self): - # type: () -> str - return self._version - - def __repr__(self): - # type: () -> str - return "<LegacyVersion({0})>".format(repr(str(self))) - - @property - def public(self): - # type: () -> str - return self._version - - @property - def base_version(self): - # type: () -> str - return self._version - - @property - def epoch(self): - # type: () -> int - return -1 - - @property - def release(self): - # type: () -> None - return None - - @property - def pre(self): - # type: () -> None - return None - - @property - def post(self): - # type: () -> None - return None - - @property - def dev(self): - # type: () -> None - return None + return self._key < other._key - @property - def local(self): - # type: () -> None - return None - - @property - def is_prerelease(self): - # type: () -> bool - return False - - @property - def is_postrelease(self): - # type: () -> bool - return False - - @property - def is_devrelease(self): - # type: () -> bool - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s): - # type: (str) -> Iterator[str] - for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) - - if not part or part == ".": - continue + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented - if part[:1] in "0123456789": - # pad for numeric comparison - yield part.zfill(8) - else: - yield "*" + part + return self._key <= other._key - # ensure that alpha/beta/candidate are before final - yield "*final" + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + return self._key == other._key -def _legacy_cmpkey(version): - # type: (str) -> LegacyCmpKey + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 + return self._key >= other._key - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts = [] # type: List[str] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() + return self._key > other._key - parts.append(part) + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented - return epoch, tuple(parts) + return self._key != other._key # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P<epoch>[0-9]+)!)? # epoch @@ -263,18 +141,61 @@ VERSION_PATTERN = r""" (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version """ +VERSION_PATTERN = _VERSION_PATTERN +""" +A string containing the regular expression used to match a valid version. + +The pattern is not anchored at either end, and is intended for embedding in larger +expressions (for example, matching a version number as part of a file name). The +regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE`` +flags set. + +:meta hide-value: +""" + class Version(_BaseVersion): + """This class abstracts handling of a project's versions. + + A :class:`Version` instance is comparison aware and can be compared and + sorted using the standard Python interfaces. + + >>> v1 = Version("1.0a5") + >>> v2 = Version("1.0") + >>> v1 + <Version('1.0a5')> + >>> v2 + <Version('1.0')> + >>> v1 < v2 + True + >>> v1 == v2 + False + >>> v1 > v2 + False + >>> v1 >= v2 + False + >>> v1 <= v2 + True + """ _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + _key: CmpKey - def __init__(self, version): - # type: (str) -> None + def __init__(self, version: str) -> None: + """Initialize a Version object. + + :param version: + The string representation of a version which will be parsed and normalized + before use. + :raises InvalidVersion: + If the ``version`` does not conform to PEP 440 in any way then this + exception will be raised. + """ # Validate the version and parse it into pieces match = self._regex.search(version) if not match: - raise InvalidVersion("Invalid version: '{0}'".format(version)) + raise InvalidVersion(f"Invalid version: '{version}'") # Store the parsed out pieces of the version self._version = _Version( @@ -298,17 +219,25 @@ class Version(_BaseVersion): self._version.local, ) - def __repr__(self): - # type: () -> str - return "<Version({0})>".format(repr(str(self))) + def __repr__(self) -> str: + """A representation of the Version that shows all internal state. + + >>> Version('1.0.0') + <Version('1.0.0')> + """ + return f"<Version('{self}')>" - def __str__(self): - # type: () -> str + def __str__(self) -> str: + """A string representation of the version that can be rounded-tripped. + + >>> str(Version("1.0a5")) + '1.0a5' + """ parts = [] # Epoch if self.epoch != 0: - parts.append("{0}!".format(self.epoch)) + parts.append(f"{self.epoch}!") # Release segment parts.append(".".join(str(x) for x in self.release)) @@ -319,67 +248,131 @@ class Version(_BaseVersion): # Post-release if self.post is not None: - parts.append(".post{0}".format(self.post)) + parts.append(f".post{self.post}") # Development release if self.dev is not None: - parts.append(".dev{0}".format(self.dev)) + parts.append(f".dev{self.dev}") # Local version segment if self.local is not None: - parts.append("+{0}".format(self.local)) + parts.append(f"+{self.local}") return "".join(parts) @property - def epoch(self): - # type: () -> int - _epoch = self._version.epoch # type: int + def epoch(self) -> int: + """The epoch of the version. + + >>> Version("2.0.0").epoch + 0 + >>> Version("1!2.0.0").epoch + 1 + """ + _epoch: int = self._version.epoch return _epoch @property - def release(self): - # type: () -> Tuple[int, ...] - _release = self._version.release # type: Tuple[int, ...] + def release(self) -> Tuple[int, ...]: + """The components of the "release" segment of the version. + + >>> Version("1.2.3").release + (1, 2, 3) + >>> Version("2.0.0").release + (2, 0, 0) + >>> Version("1!2.0.0.post0").release + (2, 0, 0) + + Includes trailing zeroes but not the epoch or any pre-release / development / + post-release suffixes. + """ + _release: Tuple[int, ...] = self._version.release return _release @property - def pre(self): - # type: () -> Optional[Tuple[str, int]] - _pre = self._version.pre # type: Optional[Tuple[str, int]] + def pre(self) -> Optional[Tuple[str, int]]: + """The pre-release segment of the version. + + >>> print(Version("1.2.3").pre) + None + >>> Version("1.2.3a1").pre + ('a', 1) + >>> Version("1.2.3b1").pre + ('b', 1) + >>> Version("1.2.3rc1").pre + ('rc', 1) + """ + _pre: Optional[Tuple[str, int]] = self._version.pre return _pre @property - def post(self): - # type: () -> Optional[Tuple[str, int]] + def post(self) -> Optional[int]: + """The post-release number of the version. + + >>> print(Version("1.2.3").post) + None + >>> Version("1.2.3.post1").post + 1 + """ return self._version.post[1] if self._version.post else None @property - def dev(self): - # type: () -> Optional[Tuple[str, int]] + def dev(self) -> Optional[int]: + """The development number of the version. + + >>> print(Version("1.2.3").dev) + None + >>> Version("1.2.3.dev1").dev + 1 + """ return self._version.dev[1] if self._version.dev else None @property - def local(self): - # type: () -> Optional[str] + def local(self) -> Optional[str]: + """The local version segment of the version. + + >>> print(Version("1.2.3").local) + None + >>> Version("1.2.3+abc").local + 'abc' + """ if self._version.local: return ".".join(str(x) for x in self._version.local) else: return None @property - def public(self): - # type: () -> str + def public(self) -> str: + """The public portion of the version. + + >>> Version("1.2.3").public + '1.2.3' + >>> Version("1.2.3+abc").public + '1.2.3' + >>> Version("1.2.3+abc.dev1").public + '1.2.3' + """ return str(self).split("+", 1)[0] @property - def base_version(self): - # type: () -> str + def base_version(self) -> str: + """The "base version" of the version. + + >>> Version("1.2.3").base_version + '1.2.3' + >>> Version("1.2.3+abc").base_version + '1.2.3' + >>> Version("1!1.2.3+abc.dev1").base_version + '1!1.2.3' + + The "base version" is the public version of the project without any pre or post + release markers. + """ parts = [] # Epoch if self.epoch != 0: - parts.append("{0}!".format(self.epoch)) + parts.append(f"{self.epoch}!") # Release segment parts.append(".".join(str(x) for x in self.release)) @@ -387,41 +380,79 @@ class Version(_BaseVersion): return "".join(parts) @property - def is_prerelease(self): - # type: () -> bool + def is_prerelease(self) -> bool: + """Whether this version is a pre-release. + + >>> Version("1.2.3").is_prerelease + False + >>> Version("1.2.3a1").is_prerelease + True + >>> Version("1.2.3b1").is_prerelease + True + >>> Version("1.2.3rc1").is_prerelease + True + >>> Version("1.2.3dev1").is_prerelease + True + """ return self.dev is not None or self.pre is not None @property - def is_postrelease(self): - # type: () -> bool + def is_postrelease(self) -> bool: + """Whether this version is a post-release. + + >>> Version("1.2.3").is_postrelease + False + >>> Version("1.2.3.post1").is_postrelease + True + """ return self.post is not None @property - def is_devrelease(self): - # type: () -> bool + def is_devrelease(self) -> bool: + """Whether this version is a development release. + + >>> Version("1.2.3").is_devrelease + False + >>> Version("1.2.3.dev1").is_devrelease + True + """ return self.dev is not None @property - def major(self): - # type: () -> int + def major(self) -> int: + """The first item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").major + 1 + """ return self.release[0] if len(self.release) >= 1 else 0 @property - def minor(self): - # type: () -> int + def minor(self) -> int: + """The second item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").minor + 2 + >>> Version("1").minor + 0 + """ return self.release[1] if len(self.release) >= 2 else 0 @property - def micro(self): - # type: () -> int + def micro(self) -> int: + """The third item of :attr:`release` or ``0`` if unavailable. + + >>> Version("1.2.3").micro + 3 + >>> Version("1").micro + 0 + """ return self.release[2] if len(self.release) >= 3 else 0 def _parse_letter_version( - letter, # type: str - number, # type: Union[str, bytes, SupportsInt] -): - # type: (...) -> Optional[Tuple[str, int]] + letter: str, number: Union[str, bytes, SupportsInt] +) -> Optional[Tuple[str, int]]: if letter: # We consider there to be an implicit 0 in a pre-release if there is @@ -458,8 +489,7 @@ def _parse_letter_version( _local_version_separators = re.compile(r"[\._-]") -def _parse_local_version(local): - # type: (str) -> Optional[LocalType] +def _parse_local_version(local: str) -> Optional[LocalType]: """ Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve"). """ @@ -472,14 +502,13 @@ def _parse_local_version(local): def _cmpkey( - epoch, # type: int - release, # type: Tuple[int, ...] - pre, # type: Optional[Tuple[str, int]] - post, # type: Optional[Tuple[str, int]] - dev, # type: Optional[Tuple[str, int]] - local, # type: Optional[Tuple[SubLocalType]] -): - # type: (...) -> CmpKey + epoch: int, + release: Tuple[int, ...], + pre: Optional[Tuple[str, int]], + post: Optional[Tuple[str, int]], + dev: Optional[Tuple[str, int]], + local: Optional[Tuple[SubLocalType]], +) -> CmpKey: # When we compare a release version, we want to compare it with all of the # trailing zeros removed. So we'll use a reverse the list, drop all the now @@ -495,7 +524,7 @@ def _cmpkey( # if there is not a pre or a post segment. If we have one of those then # the normal sorting rules will handle this case correctly. if pre is None and post is None and dev is not None: - _pre = NegativeInfinity # type: PrePostDevType + _pre: PrePostDevType = NegativeInfinity # Versions without a pre-release (except as noted above) should sort after # those with one. elif pre is None: @@ -505,21 +534,21 @@ def _cmpkey( # Versions without a post segment should sort before those with one. if post is None: - _post = NegativeInfinity # type: PrePostDevType + _post: PrePostDevType = NegativeInfinity else: _post = post # Versions without a development segment should sort after those with one. if dev is None: - _dev = Infinity # type: PrePostDevType + _dev: PrePostDevType = Infinity else: _dev = dev if local is None: # Versions without a local segment should sort before those with one. - _local = NegativeInfinity # type: LocalType + _local: LocalType = NegativeInfinity else: # Versions with a local segment need that segment parsed to implement # the sorting rules in PEP440. diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/__init__.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/__init__.py new file mode 100644 index 0000000000..aef2821b83 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/__init__.py @@ -0,0 +1,342 @@ +""" +Utilities for determining application-specific dirs. See <https://github.com/platformdirs/platformdirs> for details and +usage. +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +if sys.version_info >= (3, 8): # pragma: no cover (py38+) + from typing import Literal +else: # pragma: no cover (py38+) + from ..typing_extensions import Literal + +from .api import PlatformDirsABC +from .version import __version__ +from .version import __version_tuple__ as __version_info__ + + +def _set_platform_dir_class() -> type[PlatformDirsABC]: + if sys.platform == "win32": + from .windows import Windows as Result + elif sys.platform == "darwin": + from .macos import MacOS as Result + else: + from .unix import Unix as Result + + if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system": + + if os.getenv("SHELL") or os.getenv("PREFIX"): + return Result + + from .android import _android_folder + + if _android_folder() is not None: + from .android import Android + + return Android # return to avoid redefinition of result + + return Result + + +PlatformDirs = _set_platform_dir_class() #: Currently active platform +AppDirs = PlatformDirs #: Backwards compatibility with appdirs + + +def user_data_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param roaming: See `roaming <platformdirs.api.PlatformDirsABC.version>`. + :returns: data directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_data_dir + + +def site_data_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param multipath: See `roaming <platformdirs.api.PlatformDirsABC.multipath>`. + :returns: data directory shared by users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_data_dir + + +def user_config_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param roaming: See `roaming <platformdirs.api.PlatformDirsABC.version>`. + :returns: config directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_config_dir + + +def site_config_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param multipath: See `roaming <platformdirs.api.PlatformDirsABC.multipath>`. + :returns: config directory shared by the users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_config_dir + + +def user_cache_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`. + :returns: cache directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_cache_dir + + +def user_state_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param roaming: See `roaming <platformdirs.api.PlatformDirsABC.version>`. + :returns: state directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_state_dir + + +def user_log_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`. + :returns: log directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_log_dir + + +def user_documents_dir() -> str: + """ + :returns: documents directory tied to the user + """ + return PlatformDirs().user_documents_dir + + +def user_runtime_dir( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> str: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`. + :returns: runtime directory tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_runtime_dir + + +def user_data_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param roaming: See `roaming <platformdirs.api.PlatformDirsABC.version>`. + :returns: data path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_data_path + + +def site_data_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param multipath: See `multipath <platformdirs.api.PlatformDirsABC.multipath>`. + :returns: data path shared by users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_data_path + + +def user_config_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param roaming: See `roaming <platformdirs.api.PlatformDirsABC.version>`. + :returns: config path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_config_path + + +def site_config_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + multipath: bool = False, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param multipath: See `roaming <platformdirs.api.PlatformDirsABC.multipath>`. + :returns: config path shared by the users + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, multipath=multipath).site_config_path + + +def user_cache_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`. + :returns: cache path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_cache_path + + +def user_state_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param roaming: See `roaming <platformdirs.api.PlatformDirsABC.version>`. + :returns: state path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, roaming=roaming).user_state_path + + +def user_log_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param opinion: See `roaming <platformdirs.api.PlatformDirsABC.opinion>`. + :returns: log path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_log_path + + +def user_documents_path() -> Path: + """ + :returns: documents path tied to the user + """ + return PlatformDirs().user_documents_path + + +def user_runtime_path( + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + opinion: bool = True, +) -> Path: + """ + :param appname: See `appname <platformdirs.api.PlatformDirsABC.appname>`. + :param appauthor: See `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`. + :param version: See `version <platformdirs.api.PlatformDirsABC.version>`. + :param opinion: See `opinion <platformdirs.api.PlatformDirsABC.opinion>`. + :returns: runtime path tied to the user + """ + return PlatformDirs(appname=appname, appauthor=appauthor, version=version, opinion=opinion).user_runtime_path + + +__all__ = [ + "__version__", + "__version_info__", + "PlatformDirs", + "AppDirs", + "PlatformDirsABC", + "user_data_dir", + "user_config_dir", + "user_cache_dir", + "user_state_dir", + "user_log_dir", + "user_documents_dir", + "user_runtime_dir", + "site_data_dir", + "site_config_dir", + "user_data_path", + "user_config_path", + "user_cache_path", + "user_state_path", + "user_log_path", + "user_documents_path", + "user_runtime_path", + "site_data_path", + "site_config_path", +] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/__main__.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/__main__.py new file mode 100644 index 0000000000..0fc1edd59c --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/__main__.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from platformdirs import PlatformDirs, __version__ + +PROPS = ( + "user_data_dir", + "user_config_dir", + "user_cache_dir", + "user_state_dir", + "user_log_dir", + "user_documents_dir", + "user_runtime_dir", + "site_data_dir", + "site_config_dir", +) + + +def main() -> None: + app_name = "MyApp" + app_author = "MyCompany" + + print(f"-- platformdirs {__version__} --") + + print("-- app dirs (with optional 'version')") + dirs = PlatformDirs(app_name, app_author, version="1.0") + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + print("\n-- app dirs (without optional 'version')") + dirs = PlatformDirs(app_name, app_author) + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + print("\n-- app dirs (without optional 'appauthor')") + dirs = PlatformDirs(app_name) + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + print("\n-- app dirs (with disabled 'appauthor')") + dirs = PlatformDirs(app_name, appauthor=False) + for prop in PROPS: + print(f"{prop}: {getattr(dirs, prop)}") + + +if __name__ == "__main__": + main() diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/android.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/android.py new file mode 100644 index 0000000000..eda8093512 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/android.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import os +import re +import sys +from functools import lru_cache +from typing import cast + +from .api import PlatformDirsABC + + +class Android(PlatformDirsABC): + """ + Follows the guidance `from here <https://android.stackexchange.com/a/216132>`_. Makes use of the + `appname <platformdirs.api.PlatformDirsABC.appname>` and + `version <platformdirs.api.PlatformDirsABC.version>`. + """ + + @property + def user_data_dir(self) -> str: + """:return: data directory tied to the user, e.g. ``/data/user/<userid>/<packagename>/files/<AppName>``""" + return self._append_app_name_and_version(cast(str, _android_folder()), "files") + + @property + def site_data_dir(self) -> str: + """:return: data directory shared by users, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_config_dir(self) -> str: + """ + :return: config directory tied to the user, e.g. ``/data/user/<userid>/<packagename>/shared_prefs/<AppName>`` + """ + return self._append_app_name_and_version(cast(str, _android_folder()), "shared_prefs") + + @property + def site_config_dir(self) -> str: + """:return: config directory shared by the users, same as `user_config_dir`""" + return self.user_config_dir + + @property + def user_cache_dir(self) -> str: + """:return: cache directory tied to the user, e.g. e.g. ``/data/user/<userid>/<packagename>/cache/<AppName>``""" + return self._append_app_name_and_version(cast(str, _android_folder()), "cache") + + @property + def user_state_dir(self) -> str: + """:return: state directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_log_dir(self) -> str: + """ + :return: log directory tied to the user, same as `user_cache_dir` if not opinionated else ``log`` in it, + e.g. ``/data/user/<userid>/<packagename>/cache/<AppName>/log`` + """ + path = self.user_cache_dir + if self.opinion: + path = os.path.join(path, "log") + return path + + @property + def user_documents_dir(self) -> str: + """ + :return: documents directory tied to the user e.g. ``/storage/emulated/0/Documents`` + """ + return _android_documents_folder() + + @property + def user_runtime_dir(self) -> str: + """ + :return: runtime directory tied to the user, same as `user_cache_dir` if not opinionated else ``tmp`` in it, + e.g. ``/data/user/<userid>/<packagename>/cache/<AppName>/tmp`` + """ + path = self.user_cache_dir + if self.opinion: + path = os.path.join(path, "tmp") + return path + + +@lru_cache(maxsize=1) +def _android_folder() -> str | None: + """:return: base folder for the Android OS or None if cannot be found""" + try: + # First try to get path to android app via pyjnius + from jnius import autoclass + + Context = autoclass("android.content.Context") # noqa: N806 + result: str | None = Context.getFilesDir().getParentFile().getAbsolutePath() + except Exception: + # if fails find an android folder looking path on the sys.path + pattern = re.compile(r"/data/(data|user/\d+)/(.+)/files") + for path in sys.path: + if pattern.match(path): + result = path.split("/files")[0] + break + else: + result = None + return result + + +@lru_cache(maxsize=1) +def _android_documents_folder() -> str: + """:return: documents folder for the Android OS""" + # Get directories with pyjnius + try: + from jnius import autoclass + + Context = autoclass("android.content.Context") # noqa: N806 + Environment = autoclass("android.os.Environment") # noqa: N806 + documents_dir: str = Context.getExternalFilesDir(Environment.DIRECTORY_DOCUMENTS).getAbsolutePath() + except Exception: + documents_dir = "/storage/emulated/0/Documents" + + return documents_dir + + +__all__ = [ + "Android", +] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/api.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/api.py new file mode 100644 index 0000000000..6f6e2c2c69 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/api.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import os +import sys +from abc import ABC, abstractmethod +from pathlib import Path + +if sys.version_info >= (3, 8): # pragma: no branch + from typing import Literal # pragma: no cover + + +class PlatformDirsABC(ABC): + """ + Abstract base class for platform directories. + """ + + def __init__( + self, + appname: str | None = None, + appauthor: str | None | Literal[False] = None, + version: str | None = None, + roaming: bool = False, + multipath: bool = False, + opinion: bool = True, + ): + """ + Create a new platform directory. + + :param appname: See `appname`. + :param appauthor: See `appauthor`. + :param version: See `version`. + :param roaming: See `roaming`. + :param multipath: See `multipath`. + :param opinion: See `opinion`. + """ + self.appname = appname #: The name of application. + self.appauthor = appauthor + """ + The name of the app author or distributing body for this application. Typically, it is the owning company name. + Defaults to `appname`. You may pass ``False`` to disable it. + """ + self.version = version + """ + An optional version path element to append to the path. You might want to use this if you want multiple versions + of your app to be able to run independently. If used, this would typically be ``<major>.<minor>``. + """ + self.roaming = roaming + """ + Whether to use the roaming appdata directory on Windows. That means that for users on a Windows network setup + for roaming profiles, this user data will be synced on login (see + `here <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>`_). + """ + self.multipath = multipath + """ + An optional parameter only applicable to Unix/Linux which indicates that the entire list of data dirs should be + returned. By default, the first item would only be returned. + """ + self.opinion = opinion #: A flag to indicating to use opinionated values. + + def _append_app_name_and_version(self, *base: str) -> str: + params = list(base[1:]) + if self.appname: + params.append(self.appname) + if self.version: + params.append(self.version) + return os.path.join(base[0], *params) + + @property + @abstractmethod + def user_data_dir(self) -> str: + """:return: data directory tied to the user""" + + @property + @abstractmethod + def site_data_dir(self) -> str: + """:return: data directory shared by users""" + + @property + @abstractmethod + def user_config_dir(self) -> str: + """:return: config directory tied to the user""" + + @property + @abstractmethod + def site_config_dir(self) -> str: + """:return: config directory shared by the users""" + + @property + @abstractmethod + def user_cache_dir(self) -> str: + """:return: cache directory tied to the user""" + + @property + @abstractmethod + def user_state_dir(self) -> str: + """:return: state directory tied to the user""" + + @property + @abstractmethod + def user_log_dir(self) -> str: + """:return: log directory tied to the user""" + + @property + @abstractmethod + def user_documents_dir(self) -> str: + """:return: documents directory tied to the user""" + + @property + @abstractmethod + def user_runtime_dir(self) -> str: + """:return: runtime directory tied to the user""" + + @property + def user_data_path(self) -> Path: + """:return: data path tied to the user""" + return Path(self.user_data_dir) + + @property + def site_data_path(self) -> Path: + """:return: data path shared by users""" + return Path(self.site_data_dir) + + @property + def user_config_path(self) -> Path: + """:return: config path tied to the user""" + return Path(self.user_config_dir) + + @property + def site_config_path(self) -> Path: + """:return: config path shared by the users""" + return Path(self.site_config_dir) + + @property + def user_cache_path(self) -> Path: + """:return: cache path tied to the user""" + return Path(self.user_cache_dir) + + @property + def user_state_path(self) -> Path: + """:return: state path tied to the user""" + return Path(self.user_state_dir) + + @property + def user_log_path(self) -> Path: + """:return: log path tied to the user""" + return Path(self.user_log_dir) + + @property + def user_documents_path(self) -> Path: + """:return: documents path tied to the user""" + return Path(self.user_documents_dir) + + @property + def user_runtime_path(self) -> Path: + """:return: runtime path tied to the user""" + return Path(self.user_runtime_dir) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/macos.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/macos.py new file mode 100644 index 0000000000..a01337c776 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/macos.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import os + +from .api import PlatformDirsABC + + +class MacOS(PlatformDirsABC): + """ + Platform directories for the macOS operating system. Follows the guidance from `Apple documentation + <https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/MacOSXDirectories/MacOSXDirectories.html>`_. + Makes use of the `appname <platformdirs.api.PlatformDirsABC.appname>` and + `version <platformdirs.api.PlatformDirsABC.version>`. + """ + + @property + def user_data_dir(self) -> str: + """:return: data directory tied to the user, e.g. ``~/Library/Application Support/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Application Support/")) + + @property + def site_data_dir(self) -> str: + """:return: data directory shared by users, e.g. ``/Library/Application Support/$appname/$version``""" + return self._append_app_name_and_version("/Library/Application Support") + + @property + def user_config_dir(self) -> str: + """:return: config directory tied to the user, e.g. ``~/Library/Preferences/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Preferences/")) + + @property + def site_config_dir(self) -> str: + """:return: config directory shared by the users, e.g. ``/Library/Preferences/$appname``""" + return self._append_app_name_and_version("/Library/Preferences") + + @property + def user_cache_dir(self) -> str: + """:return: cache directory tied to the user, e.g. ``~/Library/Caches/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Caches")) + + @property + def user_state_dir(self) -> str: + """:return: state directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_log_dir(self) -> str: + """:return: log directory tied to the user, e.g. ``~/Library/Logs/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Logs")) + + @property + def user_documents_dir(self) -> str: + """:return: documents directory tied to the user, e.g. ``~/Documents``""" + return os.path.expanduser("~/Documents") + + @property + def user_runtime_dir(self) -> str: + """:return: runtime directory tied to the user, e.g. ``~/Library/Caches/TemporaryItems/$appname/$version``""" + return self._append_app_name_and_version(os.path.expanduser("~/Library/Caches/TemporaryItems")) + + +__all__ = [ + "MacOS", +] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/unix.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/unix.py new file mode 100644 index 0000000000..9aca5a0305 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/unix.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import os +import sys +from configparser import ConfigParser +from pathlib import Path + +from .api import PlatformDirsABC + +if sys.platform.startswith("linux"): # pragma: no branch # no op check, only to please the type checker + from os import getuid +else: + + def getuid() -> int: + raise RuntimeError("should only be used on Linux") + + +class Unix(PlatformDirsABC): + """ + On Unix/Linux, we follow the + `XDG Basedir Spec <https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_. The spec allows + overriding directories with environment variables. The examples show are the default values, alongside the name of + the environment variable that overrides them. Makes use of the + `appname <platformdirs.api.PlatformDirsABC.appname>`, + `version <platformdirs.api.PlatformDirsABC.version>`, + `multipath <platformdirs.api.PlatformDirsABC.multipath>`, + `opinion <platformdirs.api.PlatformDirsABC.opinion>`. + """ + + @property + def user_data_dir(self) -> str: + """ + :return: data directory tied to the user, e.g. ``~/.local/share/$appname/$version`` or + ``$XDG_DATA_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_DATA_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.local/share") + return self._append_app_name_and_version(path) + + @property + def site_data_dir(self) -> str: + """ + :return: data directories shared by users (if `multipath <platformdirs.api.PlatformDirsABC.multipath>` is + enabled and ``XDG_DATA_DIR`` is set and a multi path the response is also a multi path separated by the OS + path separator), e.g. ``/usr/local/share/$appname/$version`` or ``/usr/share/$appname/$version`` + """ + # XDG default for $XDG_DATA_DIRS; only first, if multipath is False + path = os.environ.get("XDG_DATA_DIRS", "") + if not path.strip(): + path = f"/usr/local/share{os.pathsep}/usr/share" + return self._with_multi_path(path) + + def _with_multi_path(self, path: str) -> str: + path_list = path.split(os.pathsep) + if not self.multipath: + path_list = path_list[0:1] + path_list = [self._append_app_name_and_version(os.path.expanduser(p)) for p in path_list] + return os.pathsep.join(path_list) + + @property + def user_config_dir(self) -> str: + """ + :return: config directory tied to the user, e.g. ``~/.config/$appname/$version`` or + ``$XDG_CONFIG_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_CONFIG_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.config") + return self._append_app_name_and_version(path) + + @property + def site_config_dir(self) -> str: + """ + :return: config directories shared by users (if `multipath <platformdirs.api.PlatformDirsABC.multipath>` + is enabled and ``XDG_DATA_DIR`` is set and a multi path the response is also a multi path separated by the OS + path separator), e.g. ``/etc/xdg/$appname/$version`` + """ + # XDG default for $XDG_CONFIG_DIRS only first, if multipath is False + path = os.environ.get("XDG_CONFIG_DIRS", "") + if not path.strip(): + path = "/etc/xdg" + return self._with_multi_path(path) + + @property + def user_cache_dir(self) -> str: + """ + :return: cache directory tied to the user, e.g. ``~/.cache/$appname/$version`` or + ``~/$XDG_CACHE_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_CACHE_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.cache") + return self._append_app_name_and_version(path) + + @property + def user_state_dir(self) -> str: + """ + :return: state directory tied to the user, e.g. ``~/.local/state/$appname/$version`` or + ``$XDG_STATE_HOME/$appname/$version`` + """ + path = os.environ.get("XDG_STATE_HOME", "") + if not path.strip(): + path = os.path.expanduser("~/.local/state") + return self._append_app_name_and_version(path) + + @property + def user_log_dir(self) -> str: + """ + :return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it + """ + path = self.user_state_dir + if self.opinion: + path = os.path.join(path, "log") + return path + + @property + def user_documents_dir(self) -> str: + """ + :return: documents directory tied to the user, e.g. ``~/Documents`` + """ + documents_dir = _get_user_dirs_folder("XDG_DOCUMENTS_DIR") + if documents_dir is None: + documents_dir = os.environ.get("XDG_DOCUMENTS_DIR", "").strip() + if not documents_dir: + documents_dir = os.path.expanduser("~/Documents") + + return documents_dir + + @property + def user_runtime_dir(self) -> str: + """ + :return: runtime directory tied to the user, e.g. ``/run/user/$(id -u)/$appname/$version`` or + ``$XDG_RUNTIME_DIR/$appname/$version`` + """ + path = os.environ.get("XDG_RUNTIME_DIR", "") + if not path.strip(): + path = f"/run/user/{getuid()}" + return self._append_app_name_and_version(path) + + @property + def site_data_path(self) -> Path: + """:return: data path shared by users. Only return first item, even if ``multipath`` is set to ``True``""" + return self._first_item_as_path_if_multipath(self.site_data_dir) + + @property + def site_config_path(self) -> Path: + """:return: config path shared by the users. Only return first item, even if ``multipath`` is set to ``True``""" + return self._first_item_as_path_if_multipath(self.site_config_dir) + + def _first_item_as_path_if_multipath(self, directory: str) -> Path: + if self.multipath: + # If multipath is True, the first path is returned. + directory = directory.split(os.pathsep)[0] + return Path(directory) + + +def _get_user_dirs_folder(key: str) -> str | None: + """Return directory from user-dirs.dirs config file. See https://freedesktop.org/wiki/Software/xdg-user-dirs/""" + user_dirs_config_path = os.path.join(Unix().user_config_dir, "user-dirs.dirs") + if os.path.exists(user_dirs_config_path): + parser = ConfigParser() + + with open(user_dirs_config_path) as stream: + # Add fake section header, so ConfigParser doesn't complain + parser.read_string(f"[top]\n{stream.read()}") + + if key not in parser["top"]: + return None + + path = parser["top"][key].strip('"') + # Handle relative home paths + path = path.replace("$HOME", os.path.expanduser("~")) + return path + + return None + + +__all__ = [ + "Unix", +] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/version.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/version.py new file mode 100644 index 0000000000..9f6eb98e8f --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '2.6.2' +__version_tuple__ = version_tuple = (2, 6, 2) diff --git a/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/windows.py b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/windows.py new file mode 100644 index 0000000000..d5c27b3414 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/platformdirs/windows.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +import ctypes +import os +import sys +from functools import lru_cache +from typing import Callable + +from .api import PlatformDirsABC + + +class Windows(PlatformDirsABC): + """`MSDN on where to store app data files + <http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120>`_. + Makes use of the + `appname <platformdirs.api.PlatformDirsABC.appname>`, + `appauthor <platformdirs.api.PlatformDirsABC.appauthor>`, + `version <platformdirs.api.PlatformDirsABC.version>`, + `roaming <platformdirs.api.PlatformDirsABC.roaming>`, + `opinion <platformdirs.api.PlatformDirsABC.opinion>`.""" + + @property + def user_data_dir(self) -> str: + """ + :return: data directory tied to the user, e.g. + ``%USERPROFILE%\\AppData\\Local\\$appauthor\\$appname`` (not roaming) or + ``%USERPROFILE%\\AppData\\Roaming\\$appauthor\\$appname`` (roaming) + """ + const = "CSIDL_APPDATA" if self.roaming else "CSIDL_LOCAL_APPDATA" + path = os.path.normpath(get_win_folder(const)) + return self._append_parts(path) + + def _append_parts(self, path: str, *, opinion_value: str | None = None) -> str: + params = [] + if self.appname: + if self.appauthor is not False: + author = self.appauthor or self.appname + params.append(author) + params.append(self.appname) + if opinion_value is not None and self.opinion: + params.append(opinion_value) + if self.version: + params.append(self.version) + return os.path.join(path, *params) + + @property + def site_data_dir(self) -> str: + """:return: data directory shared by users, e.g. ``C:\\ProgramData\\$appauthor\\$appname``""" + path = os.path.normpath(get_win_folder("CSIDL_COMMON_APPDATA")) + return self._append_parts(path) + + @property + def user_config_dir(self) -> str: + """:return: config directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def site_config_dir(self) -> str: + """:return: config directory shared by the users, same as `site_data_dir`""" + return self.site_data_dir + + @property + def user_cache_dir(self) -> str: + """ + :return: cache directory tied to the user (if opinionated with ``Cache`` folder within ``$appname``) e.g. + ``%USERPROFILE%\\AppData\\Local\\$appauthor\\$appname\\Cache\\$version`` + """ + path = os.path.normpath(get_win_folder("CSIDL_LOCAL_APPDATA")) + return self._append_parts(path, opinion_value="Cache") + + @property + def user_state_dir(self) -> str: + """:return: state directory tied to the user, same as `user_data_dir`""" + return self.user_data_dir + + @property + def user_log_dir(self) -> str: + """ + :return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``Logs`` in it + """ + path = self.user_data_dir + if self.opinion: + path = os.path.join(path, "Logs") + return path + + @property + def user_documents_dir(self) -> str: + """ + :return: documents directory tied to the user e.g. ``%USERPROFILE%\\Documents`` + """ + return os.path.normpath(get_win_folder("CSIDL_PERSONAL")) + + @property + def user_runtime_dir(self) -> str: + """ + :return: runtime directory tied to the user, e.g. + ``%USERPROFILE%\\AppData\\Local\\Temp\\$appauthor\\$appname`` + """ + path = os.path.normpath(os.path.join(get_win_folder("CSIDL_LOCAL_APPDATA"), "Temp")) + return self._append_parts(path) + + +def get_win_folder_from_env_vars(csidl_name: str) -> str: + """Get folder from environment variables.""" + if csidl_name == "CSIDL_PERSONAL": # does not have an environment name + return os.path.join(os.path.normpath(os.environ["USERPROFILE"]), "Documents") + + env_var_name = { + "CSIDL_APPDATA": "APPDATA", + "CSIDL_COMMON_APPDATA": "ALLUSERSPROFILE", + "CSIDL_LOCAL_APPDATA": "LOCALAPPDATA", + }.get(csidl_name) + if env_var_name is None: + raise ValueError(f"Unknown CSIDL name: {csidl_name}") + result = os.environ.get(env_var_name) + if result is None: + raise ValueError(f"Unset environment variable: {env_var_name}") + return result + + +def get_win_folder_from_registry(csidl_name: str) -> str: + """Get folder from the registry. + + This is a fallback technique at best. I'm not sure if using the + registry for this guarantees us the correct answer for all CSIDL_* + names. + """ + shell_folder_name = { + "CSIDL_APPDATA": "AppData", + "CSIDL_COMMON_APPDATA": "Common AppData", + "CSIDL_LOCAL_APPDATA": "Local AppData", + "CSIDL_PERSONAL": "Personal", + }.get(csidl_name) + if shell_folder_name is None: + raise ValueError(f"Unknown CSIDL name: {csidl_name}") + if sys.platform != "win32": # only needed for mypy type checker to know that this code runs only on Windows + raise NotImplementedError + import winreg + + key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders") + directory, _ = winreg.QueryValueEx(key, shell_folder_name) + return str(directory) + + +def get_win_folder_via_ctypes(csidl_name: str) -> str: + """Get folder with ctypes.""" + csidl_const = { + "CSIDL_APPDATA": 26, + "CSIDL_COMMON_APPDATA": 35, + "CSIDL_LOCAL_APPDATA": 28, + "CSIDL_PERSONAL": 5, + }.get(csidl_name) + if csidl_const is None: + raise ValueError(f"Unknown CSIDL name: {csidl_name}") + + buf = ctypes.create_unicode_buffer(1024) + windll = getattr(ctypes, "windll") # noqa: B009 # using getattr to avoid false positive with mypy type checker + windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf) + + # Downgrade to short path name if it has highbit chars. + if any(ord(c) > 255 for c in buf): + buf2 = ctypes.create_unicode_buffer(1024) + if windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024): + buf = buf2 + + return buf.value + + +def _pick_get_win_folder() -> Callable[[str], str]: + if hasattr(ctypes, "windll"): + return get_win_folder_via_ctypes + try: + import winreg # noqa: F401 + except ImportError: + return get_win_folder_from_env_vars + else: + return get_win_folder_from_registry + + +get_win_folder = lru_cache(maxsize=None)(_pick_get_win_folder()) + +__all__ = [ + "Windows", +] diff --git a/third_party/python/setuptools/pkg_resources/_vendor/pyparsing.py b/third_party/python/setuptools/pkg_resources/_vendor/pyparsing.py deleted file mode 100644 index cf75e1e5fc..0000000000 --- a/third_party/python/setuptools/pkg_resources/_vendor/pyparsing.py +++ /dev/null @@ -1,5742 +0,0 @@ -# module pyparsing.py
-#
-# Copyright (c) 2003-2018 Paul T. McGuire
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__doc__ = \
-"""
-pyparsing module - Classes and methods to define and execute parsing grammars
-=============================================================================
-
-The pyparsing module is an alternative approach to creating and executing simple grammars,
-vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
-don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
-provides a library of classes that you use to construct the grammar directly in Python.
-
-Here is a program to parse "Hello, World!" (or any greeting of the form
-C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
-(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
-L{Literal} expressions)::
-
- from pyparsing import Word, alphas
-
- # define grammar of a greeting
- greet = Word(alphas) + "," + Word(alphas) + "!"
-
- hello = "Hello, World!"
- print (hello, "->", greet.parseString(hello))
-
-The program outputs the following::
-
- Hello, World! -> ['Hello', ',', 'World', '!']
-
-The Python representation of the grammar is quite readable, owing to the self-explanatory
-class names, and the use of '+', '|' and '^' operators.
-
-The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
-object with named attributes.
-
-The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
- - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
- - quoted strings
- - embedded comments
-
-
-Getting Started -
------------------
-Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
-classes inherit from. Use the docstrings for examples of how to:
- - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
- - construct character word-group expressions using the L{Word} class
- - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
- - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
- - associate names with your parsed results using L{ParserElement.setResultsName}
- - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
- - find more useful common expressions in the L{pyparsing_common} namespace class
-"""
-
-__version__ = "2.2.1"
-__versionTime__ = "18 Sep 2018 00:49 UTC"
-__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
-
-import string
-from weakref import ref as wkref
-import copy
-import sys
-import warnings
-import re
-import sre_constants
-import collections
-import pprint
-import traceback
-import types
-from datetime import datetime
-
-try:
- from _thread import RLock
-except ImportError:
- from threading import RLock
-
-try:
- # Python 3
- from collections.abc import Iterable
- from collections.abc import MutableMapping
-except ImportError:
- # Python 2.7
- from collections import Iterable
- from collections import MutableMapping
-
-try:
- from collections import OrderedDict as _OrderedDict
-except ImportError:
- try:
- from ordereddict import OrderedDict as _OrderedDict
- except ImportError:
- _OrderedDict = None
-
-#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
-
-__all__ = [
-'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
-'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
-'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
-'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
-'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
-'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
-'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
-'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
-'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
-'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
-'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
-'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
-'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
-'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
-'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
-'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
-'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
-'CloseMatch', 'tokenMap', 'pyparsing_common',
-]
-
-system_version = tuple(sys.version_info)[:3]
-PY_3 = system_version[0] == 3
-if PY_3:
- _MAX_INT = sys.maxsize
- basestring = str
- unichr = chr
- _ustr = str
-
- # build list of single arg builtins, that can be used as parse actions
- singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
-
-else:
- _MAX_INT = sys.maxint
- range = xrange
-
- def _ustr(obj):
- """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
- str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
- then < returns the unicode object | encodes it with the default encoding | ... >.
- """
- if isinstance(obj,unicode):
- return obj
-
- try:
- # If this works, then _ustr(obj) has the same behaviour as str(obj), so
- # it won't break any existing code.
- return str(obj)
-
- except UnicodeEncodeError:
- # Else encode it
- ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
- xmlcharref = Regex(r'&#\d+;')
- xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
- return xmlcharref.transformString(ret)
-
- # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
- singleArgBuiltins = []
- import __builtin__
- for fname in "sum len sorted reversed list tuple set any all min max".split():
- try:
- singleArgBuiltins.append(getattr(__builtin__,fname))
- except AttributeError:
- continue
-
-_generatorType = type((y for y in range(1)))
-
-def _xml_escape(data):
- """Escape &, <, >, ", ', etc. in a string of data."""
-
- # ampersand must be replaced first
- from_symbols = '&><"\''
- to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
- for from_,to_ in zip(from_symbols, to_symbols):
- data = data.replace(from_, to_)
- return data
-
-class _Constants(object):
- pass
-
-alphas = string.ascii_uppercase + string.ascii_lowercase
-nums = "0123456789"
-hexnums = nums + "ABCDEFabcdef"
-alphanums = alphas + nums
-_bslash = chr(92)
-printables = "".join(c for c in string.printable if c not in string.whitespace)
-
-class ParseBaseException(Exception):
- """base exception class for all parsing runtime exceptions"""
- # Performance tuning: we construct a *lot* of these, so keep this
- # constructor as small and fast as possible
- def __init__( self, pstr, loc=0, msg=None, elem=None ):
- self.loc = loc
- if msg is None:
- self.msg = pstr
- self.pstr = ""
- else:
- self.msg = msg
- self.pstr = pstr
- self.parserElement = elem
- self.args = (pstr, loc, msg)
-
- @classmethod
- def _from_exception(cls, pe):
- """
- internal factory method to simplify creating one type of ParseException
- from another - avoids having __init__ signature conflicts among subclasses
- """
- return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
-
- def __getattr__( self, aname ):
- """supported attributes by name are:
- - lineno - returns the line number of the exception text
- - col - returns the column number of the exception text
- - line - returns the line containing the exception text
- """
- if( aname == "lineno" ):
- return lineno( self.loc, self.pstr )
- elif( aname in ("col", "column") ):
- return col( self.loc, self.pstr )
- elif( aname == "line" ):
- return line( self.loc, self.pstr )
- else:
- raise AttributeError(aname)
-
- def __str__( self ):
- return "%s (at char %d), (line:%d, col:%d)" % \
- ( self.msg, self.loc, self.lineno, self.column )
- def __repr__( self ):
- return _ustr(self)
- def markInputline( self, markerString = ">!<" ):
- """Extracts the exception line from the input string, and marks
- the location of the exception with a special symbol.
- """
- line_str = self.line
- line_column = self.column - 1
- if markerString:
- line_str = "".join((line_str[:line_column],
- markerString, line_str[line_column:]))
- return line_str.strip()
- def __dir__(self):
- return "lineno col line".split() + dir(type(self))
-
-class ParseException(ParseBaseException):
- """
- Exception thrown when parse expressions don't match class;
- supported attributes by name are:
- - lineno - returns the line number of the exception text
- - col - returns the column number of the exception text
- - line - returns the line containing the exception text
-
- Example::
- try:
- Word(nums).setName("integer").parseString("ABC")
- except ParseException as pe:
- print(pe)
- print("column: {}".format(pe.col))
-
- prints::
- Expected integer (at char 0), (line:1, col:1)
- column: 1
- """
- pass
-
-class ParseFatalException(ParseBaseException):
- """user-throwable exception thrown when inconsistent parse content
- is found; stops all parsing immediately"""
- pass
-
-class ParseSyntaxException(ParseFatalException):
- """just like L{ParseFatalException}, but thrown internally when an
- L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
- immediately because an unbacktrackable syntax error has been found"""
- pass
-
-#~ class ReparseException(ParseBaseException):
- #~ """Experimental class - parse actions can raise this exception to cause
- #~ pyparsing to reparse the input string:
- #~ - with a modified input string, and/or
- #~ - with a modified start location
- #~ Set the values of the ReparseException in the constructor, and raise the
- #~ exception in a parse action to cause pyparsing to use the new string/location.
- #~ Setting the values as None causes no change to be made.
- #~ """
- #~ def __init_( self, newstring, restartLoc ):
- #~ self.newParseText = newstring
- #~ self.reparseLoc = restartLoc
-
-class RecursiveGrammarException(Exception):
- """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
- def __init__( self, parseElementList ):
- self.parseElementTrace = parseElementList
-
- def __str__( self ):
- return "RecursiveGrammarException: %s" % self.parseElementTrace
-
-class _ParseResultsWithOffset(object):
- def __init__(self,p1,p2):
- self.tup = (p1,p2)
- def __getitem__(self,i):
- return self.tup[i]
- def __repr__(self):
- return repr(self.tup[0])
- def setOffset(self,i):
- self.tup = (self.tup[0],i)
-
-class ParseResults(object):
- """
- Structured parse results, to provide multiple means of access to the parsed data:
- - as a list (C{len(results)})
- - by list index (C{results[0], results[1]}, etc.)
- - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
-
- Example::
- integer = Word(nums)
- date_str = (integer.setResultsName("year") + '/'
- + integer.setResultsName("month") + '/'
- + integer.setResultsName("day"))
- # equivalent form:
- # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
- # parseString returns a ParseResults object
- result = date_str.parseString("1999/12/31")
-
- def test(s, fn=repr):
- print("%s -> %s" % (s, fn(eval(s))))
- test("list(result)")
- test("result[0]")
- test("result['month']")
- test("result.day")
- test("'month' in result")
- test("'minutes' in result")
- test("result.dump()", str)
- prints::
- list(result) -> ['1999', '/', '12', '/', '31']
- result[0] -> '1999'
- result['month'] -> '12'
- result.day -> '31'
- 'month' in result -> True
- 'minutes' in result -> False
- result.dump() -> ['1999', '/', '12', '/', '31']
- - day: 31
- - month: 12
- - year: 1999
- """
- def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
- if isinstance(toklist, cls):
- return toklist
- retobj = object.__new__(cls)
- retobj.__doinit = True
- return retobj
-
- # Performance tuning: we construct a *lot* of these, so keep this
- # constructor as small and fast as possible
- def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
- if self.__doinit:
- self.__doinit = False
- self.__name = None
- self.__parent = None
- self.__accumNames = {}
- self.__asList = asList
- self.__modal = modal
- if toklist is None:
- toklist = []
- if isinstance(toklist, list):
- self.__toklist = toklist[:]
- elif isinstance(toklist, _generatorType):
- self.__toklist = list(toklist)
- else:
- self.__toklist = [toklist]
- self.__tokdict = dict()
-
- if name is not None and name:
- if not modal:
- self.__accumNames[name] = 0
- if isinstance(name,int):
- name = _ustr(name) # will always return a str, but use _ustr for consistency
- self.__name = name
- if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
- if isinstance(toklist,basestring):
- toklist = [ toklist ]
- if asList:
- if isinstance(toklist,ParseResults):
- self[name] = _ParseResultsWithOffset(toklist.copy(),0)
- else:
- self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
- self[name].__name = name
- else:
- try:
- self[name] = toklist[0]
- except (KeyError,TypeError,IndexError):
- self[name] = toklist
-
- def __getitem__( self, i ):
- if isinstance( i, (int,slice) ):
- return self.__toklist[i]
- else:
- if i not in self.__accumNames:
- return self.__tokdict[i][-1][0]
- else:
- return ParseResults([ v[0] for v in self.__tokdict[i] ])
-
- def __setitem__( self, k, v, isinstance=isinstance ):
- if isinstance(v,_ParseResultsWithOffset):
- self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
- sub = v[0]
- elif isinstance(k,(int,slice)):
- self.__toklist[k] = v
- sub = v
- else:
- self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
- sub = v
- if isinstance(sub,ParseResults):
- sub.__parent = wkref(self)
-
- def __delitem__( self, i ):
- if isinstance(i,(int,slice)):
- mylen = len( self.__toklist )
- del self.__toklist[i]
-
- # convert int to slice
- if isinstance(i, int):
- if i < 0:
- i += mylen
- i = slice(i, i+1)
- # get removed indices
- removed = list(range(*i.indices(mylen)))
- removed.reverse()
- # fixup indices in token dictionary
- for name,occurrences in self.__tokdict.items():
- for j in removed:
- for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
- else:
- del self.__tokdict[i]
-
- def __contains__( self, k ):
- return k in self.__tokdict
-
- def __len__( self ): return len( self.__toklist )
- def __bool__(self): return ( not not self.__toklist )
- __nonzero__ = __bool__
- def __iter__( self ): return iter( self.__toklist )
- def __reversed__( self ): return iter( self.__toklist[::-1] )
- def _iterkeys( self ):
- if hasattr(self.__tokdict, "iterkeys"):
- return self.__tokdict.iterkeys()
- else:
- return iter(self.__tokdict)
-
- def _itervalues( self ):
- return (self[k] for k in self._iterkeys())
-
- def _iteritems( self ):
- return ((k, self[k]) for k in self._iterkeys())
-
- if PY_3:
- keys = _iterkeys
- """Returns an iterator of all named result keys (Python 3.x only)."""
-
- values = _itervalues
- """Returns an iterator of all named result values (Python 3.x only)."""
-
- items = _iteritems
- """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
-
- else:
- iterkeys = _iterkeys
- """Returns an iterator of all named result keys (Python 2.x only)."""
-
- itervalues = _itervalues
- """Returns an iterator of all named result values (Python 2.x only)."""
-
- iteritems = _iteritems
- """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
-
- def keys( self ):
- """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
- return list(self.iterkeys())
-
- def values( self ):
- """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
- return list(self.itervalues())
-
- def items( self ):
- """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
- return list(self.iteritems())
-
- def haskeys( self ):
- """Since keys() returns an iterator, this method is helpful in bypassing
- code that looks for the existence of any defined results names."""
- return bool(self.__tokdict)
-
- def pop( self, *args, **kwargs):
- """
- Removes and returns item at specified index (default=C{last}).
- Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
- argument or an integer argument, it will use C{list} semantics
- and pop tokens from the list of parsed tokens. If passed a
- non-integer argument (most likely a string), it will use C{dict}
- semantics and pop the corresponding value from any defined
- results names. A second default return value argument is
- supported, just as in C{dict.pop()}.
-
- Example::
- def remove_first(tokens):
- tokens.pop(0)
- print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
- print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
-
- label = Word(alphas)
- patt = label("LABEL") + OneOrMore(Word(nums))
- print(patt.parseString("AAB 123 321").dump())
-
- # Use pop() in a parse action to remove named result (note that corresponding value is not
- # removed from list form of results)
- def remove_LABEL(tokens):
- tokens.pop("LABEL")
- return tokens
- patt.addParseAction(remove_LABEL)
- print(patt.parseString("AAB 123 321").dump())
- prints::
- ['AAB', '123', '321']
- - LABEL: AAB
-
- ['AAB', '123', '321']
- """
- if not args:
- args = [-1]
- for k,v in kwargs.items():
- if k == 'default':
- args = (args[0], v)
- else:
- raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
- if (isinstance(args[0], int) or
- len(args) == 1 or
- args[0] in self):
- index = args[0]
- ret = self[index]
- del self[index]
- return ret
- else:
- defaultvalue = args[1]
- return defaultvalue
-
- def get(self, key, defaultValue=None):
- """
- Returns named result matching the given key, or if there is no
- such name, then returns the given C{defaultValue} or C{None} if no
- C{defaultValue} is specified.
-
- Similar to C{dict.get()}.
-
- Example::
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
- result = date_str.parseString("1999/12/31")
- print(result.get("year")) # -> '1999'
- print(result.get("hour", "not specified")) # -> 'not specified'
- print(result.get("hour")) # -> None
- """
- if key in self:
- return self[key]
- else:
- return defaultValue
-
- def insert( self, index, insStr ):
- """
- Inserts new element at location index in the list of parsed tokens.
-
- Similar to C{list.insert()}.
-
- Example::
- print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
-
- # use a parse action to insert the parse location in the front of the parsed results
- def insert_locn(locn, tokens):
- tokens.insert(0, locn)
- print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
- """
- self.__toklist.insert(index, insStr)
- # fixup indices in token dictionary
- for name,occurrences in self.__tokdict.items():
- for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
-
- def append( self, item ):
- """
- Add single element to end of ParseResults list of elements.
-
- Example::
- print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
-
- # use a parse action to compute the sum of the parsed integers, and add it to the end
- def append_sum(tokens):
- tokens.append(sum(map(int, tokens)))
- print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
- """
- self.__toklist.append(item)
-
- def extend( self, itemseq ):
- """
- Add sequence of elements to end of ParseResults list of elements.
-
- Example::
- patt = OneOrMore(Word(alphas))
-
- # use a parse action to append the reverse of the matched strings, to make a palindrome
- def make_palindrome(tokens):
- tokens.extend(reversed([t[::-1] for t in tokens]))
- return ''.join(tokens)
- print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
- """
- if isinstance(itemseq, ParseResults):
- self += itemseq
- else:
- self.__toklist.extend(itemseq)
-
- def clear( self ):
- """
- Clear all elements and results names.
- """
- del self.__toklist[:]
- self.__tokdict.clear()
-
- def __getattr__( self, name ):
- try:
- return self[name]
- except KeyError:
- return ""
-
- if name in self.__tokdict:
- if name not in self.__accumNames:
- return self.__tokdict[name][-1][0]
- else:
- return ParseResults([ v[0] for v in self.__tokdict[name] ])
- else:
- return ""
-
- def __add__( self, other ):
- ret = self.copy()
- ret += other
- return ret
-
- def __iadd__( self, other ):
- if other.__tokdict:
- offset = len(self.__toklist)
- addoffset = lambda a: offset if a<0 else a+offset
- otheritems = other.__tokdict.items()
- otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
- for (k,vlist) in otheritems for v in vlist]
- for k,v in otherdictitems:
- self[k] = v
- if isinstance(v[0],ParseResults):
- v[0].__parent = wkref(self)
-
- self.__toklist += other.__toklist
- self.__accumNames.update( other.__accumNames )
- return self
-
- def __radd__(self, other):
- if isinstance(other,int) and other == 0:
- # useful for merging many ParseResults using sum() builtin
- return self.copy()
- else:
- # this may raise a TypeError - so be it
- return other + self
-
- def __repr__( self ):
- return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
-
- def __str__( self ):
- return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
-
- def _asStringList( self, sep='' ):
- out = []
- for item in self.__toklist:
- if out and sep:
- out.append(sep)
- if isinstance( item, ParseResults ):
- out += item._asStringList()
- else:
- out.append( _ustr(item) )
- return out
-
- def asList( self ):
- """
- Returns the parse results as a nested list of matching tokens, all converted to strings.
-
- Example::
- patt = OneOrMore(Word(alphas))
- result = patt.parseString("sldkj lsdkj sldkj")
- # even though the result prints in string-like form, it is actually a pyparsing ParseResults
- print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
-
- # Use asList() to create an actual list
- result_list = result.asList()
- print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
- """
- return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
-
- def asDict( self ):
- """
- Returns the named parse results as a nested dictionary.
-
- Example::
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
- result = date_str.parseString('12/31/1999')
- print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
-
- result_dict = result.asDict()
- print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
-
- # even though a ParseResults supports dict-like access, sometime you just need to have a dict
- import json
- print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
- print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
- """
- if PY_3:
- item_fn = self.items
- else:
- item_fn = self.iteritems
-
- def toItem(obj):
- if isinstance(obj, ParseResults):
- if obj.haskeys():
- return obj.asDict()
- else:
- return [toItem(v) for v in obj]
- else:
- return obj
-
- return dict((k,toItem(v)) for k,v in item_fn())
-
- def copy( self ):
- """
- Returns a new copy of a C{ParseResults} object.
- """
- ret = ParseResults( self.__toklist )
- ret.__tokdict = self.__tokdict.copy()
- ret.__parent = self.__parent
- ret.__accumNames.update( self.__accumNames )
- ret.__name = self.__name
- return ret
-
- def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
- """
- (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
- """
- nl = "\n"
- out = []
- namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
- for v in vlist)
- nextLevelIndent = indent + " "
-
- # collapse out indents if formatting is not desired
- if not formatted:
- indent = ""
- nextLevelIndent = ""
- nl = ""
-
- selfTag = None
- if doctag is not None:
- selfTag = doctag
- else:
- if self.__name:
- selfTag = self.__name
-
- if not selfTag:
- if namedItemsOnly:
- return ""
- else:
- selfTag = "ITEM"
-
- out += [ nl, indent, "<", selfTag, ">" ]
-
- for i,res in enumerate(self.__toklist):
- if isinstance(res,ParseResults):
- if i in namedItems:
- out += [ res.asXML(namedItems[i],
- namedItemsOnly and doctag is None,
- nextLevelIndent,
- formatted)]
- else:
- out += [ res.asXML(None,
- namedItemsOnly and doctag is None,
- nextLevelIndent,
- formatted)]
- else:
- # individual token, see if there is a name for it
- resTag = None
- if i in namedItems:
- resTag = namedItems[i]
- if not resTag:
- if namedItemsOnly:
- continue
- else:
- resTag = "ITEM"
- xmlBodyText = _xml_escape(_ustr(res))
- out += [ nl, nextLevelIndent, "<", resTag, ">",
- xmlBodyText,
- "</", resTag, ">" ]
-
- out += [ nl, indent, "</", selfTag, ">" ]
- return "".join(out)
-
- def __lookup(self,sub):
- for k,vlist in self.__tokdict.items():
- for v,loc in vlist:
- if sub is v:
- return k
- return None
-
- def getName(self):
- r"""
- Returns the results name for this token expression. Useful when several
- different expressions might match at a particular location.
-
- Example::
- integer = Word(nums)
- ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
- house_number_expr = Suppress('#') + Word(nums, alphanums)
- user_data = (Group(house_number_expr)("house_number")
- | Group(ssn_expr)("ssn")
- | Group(integer)("age"))
- user_info = OneOrMore(user_data)
-
- result = user_info.parseString("22 111-22-3333 #221B")
- for item in result:
- print(item.getName(), ':', item[0])
- prints::
- age : 22
- ssn : 111-22-3333
- house_number : 221B
- """
- if self.__name:
- return self.__name
- elif self.__parent:
- par = self.__parent()
- if par:
- return par.__lookup(self)
- else:
- return None
- elif (len(self) == 1 and
- len(self.__tokdict) == 1 and
- next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
- return next(iter(self.__tokdict.keys()))
- else:
- return None
-
- def dump(self, indent='', depth=0, full=True):
- """
- Diagnostic method for listing out the contents of a C{ParseResults}.
- Accepts an optional C{indent} argument so that this string can be embedded
- in a nested display of other data.
-
- Example::
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
- result = date_str.parseString('12/31/1999')
- print(result.dump())
- prints::
- ['12', '/', '31', '/', '1999']
- - day: 1999
- - month: 31
- - year: 12
- """
- out = []
- NL = '\n'
- out.append( indent+_ustr(self.asList()) )
- if full:
- if self.haskeys():
- items = sorted((str(k), v) for k,v in self.items())
- for k,v in items:
- if out:
- out.append(NL)
- out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
- if isinstance(v,ParseResults):
- if v:
- out.append( v.dump(indent,depth+1) )
- else:
- out.append(_ustr(v))
- else:
- out.append(repr(v))
- elif any(isinstance(vv,ParseResults) for vv in self):
- v = self
- for i,vv in enumerate(v):
- if isinstance(vv,ParseResults):
- out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
- else:
- out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
-
- return "".join(out)
-
- def pprint(self, *args, **kwargs):
- """
- Pretty-printer for parsed results as a list, using the C{pprint} module.
- Accepts additional positional or keyword args as defined for the
- C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
-
- Example::
- ident = Word(alphas, alphanums)
- num = Word(nums)
- func = Forward()
- term = ident | num | Group('(' + func + ')')
- func <<= ident + Group(Optional(delimitedList(term)))
- result = func.parseString("fna a,b,(fnb c,d,200),100")
- result.pprint(width=40)
- prints::
- ['fna',
- ['a',
- 'b',
- ['(', 'fnb', ['c', 'd', '200'], ')'],
- '100']]
- """
- pprint.pprint(self.asList(), *args, **kwargs)
-
- # add support for pickle protocol
- def __getstate__(self):
- return ( self.__toklist,
- ( self.__tokdict.copy(),
- self.__parent is not None and self.__parent() or None,
- self.__accumNames,
- self.__name ) )
-
- def __setstate__(self,state):
- self.__toklist = state[0]
- (self.__tokdict,
- par,
- inAccumNames,
- self.__name) = state[1]
- self.__accumNames = {}
- self.__accumNames.update(inAccumNames)
- if par is not None:
- self.__parent = wkref(par)
- else:
- self.__parent = None
-
- def __getnewargs__(self):
- return self.__toklist, self.__name, self.__asList, self.__modal
-
- def __dir__(self):
- return (dir(type(self)) + list(self.keys()))
-
-MutableMapping.register(ParseResults)
-
-def col (loc,strg):
- """Returns current column within a string, counting newlines as line separators.
- The first column is number 1.
-
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
- on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
- s = strg
- return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
-
-def lineno(loc,strg):
- """Returns current line number within a string, counting newlines as line separators.
- The first line is number 1.
-
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
- on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
- """
- return strg.count("\n",0,loc) + 1
-
-def line( loc, strg ):
- """Returns the line of text containing loc within a string, counting newlines as line separators.
- """
- lastCR = strg.rfind("\n", 0, loc)
- nextCR = strg.find("\n", loc)
- if nextCR >= 0:
- return strg[lastCR+1:nextCR]
- else:
- return strg[lastCR+1:]
-
-def _defaultStartDebugAction( instring, loc, expr ):
- print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
-
-def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
- print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
-
-def _defaultExceptionDebugAction( instring, loc, expr, exc ):
- print ("Exception raised:" + _ustr(exc))
-
-def nullDebugAction(*args):
- """'Do-nothing' debug action, to suppress debugging output during parsing."""
- pass
-
-# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
-#~ 'decorator to trim function calls to match the arity of the target'
-#~ def _trim_arity(func, maxargs=3):
- #~ if func in singleArgBuiltins:
- #~ return lambda s,l,t: func(t)
- #~ limit = 0
- #~ foundArity = False
- #~ def wrapper(*args):
- #~ nonlocal limit,foundArity
- #~ while 1:
- #~ try:
- #~ ret = func(*args[limit:])
- #~ foundArity = True
- #~ return ret
- #~ except TypeError:
- #~ if limit == maxargs or foundArity:
- #~ raise
- #~ limit += 1
- #~ continue
- #~ return wrapper
-
-# this version is Python 2.x-3.x cross-compatible
-'decorator to trim function calls to match the arity of the target'
-def _trim_arity(func, maxargs=2):
- if func in singleArgBuiltins:
- return lambda s,l,t: func(t)
- limit = [0]
- foundArity = [False]
-
- # traceback return data structure changed in Py3.5 - normalize back to plain tuples
- if system_version[:2] >= (3,5):
- def extract_stack(limit=0):
- # special handling for Python 3.5.0 - extra deep call stack by 1
- offset = -3 if system_version == (3,5,0) else -2
- frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
- return [frame_summary[:2]]
- def extract_tb(tb, limit=0):
- frames = traceback.extract_tb(tb, limit=limit)
- frame_summary = frames[-1]
- return [frame_summary[:2]]
- else:
- extract_stack = traceback.extract_stack
- extract_tb = traceback.extract_tb
-
- # synthesize what would be returned by traceback.extract_stack at the call to
- # user's parse action 'func', so that we don't incur call penalty at parse time
-
- LINE_DIFF = 6
- # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
- # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
- this_line = extract_stack(limit=2)[-1]
- pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
-
- def wrapper(*args):
- while 1:
- try:
- ret = func(*args[limit[0]:])
- foundArity[0] = True
- return ret
- except TypeError:
- # re-raise TypeErrors if they did not come from our arity testing
- if foundArity[0]:
- raise
- else:
- try:
- tb = sys.exc_info()[-1]
- if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
- raise
- finally:
- del tb
-
- if limit[0] <= maxargs:
- limit[0] += 1
- continue
- raise
-
- # copy func name to wrapper for sensible debug output
- func_name = "<parse action>"
- try:
- func_name = getattr(func, '__name__',
- getattr(func, '__class__').__name__)
- except Exception:
- func_name = str(func)
- wrapper.__name__ = func_name
-
- return wrapper
-
-class ParserElement(object):
- """Abstract base level parser element class."""
- DEFAULT_WHITE_CHARS = " \n\t\r"
- verbose_stacktrace = False
-
- @staticmethod
- def setDefaultWhitespaceChars( chars ):
- r"""
- Overrides the default whitespace chars
-
- Example::
- # default whitespace chars are space, <TAB> and newline
- OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
-
- # change to just treat newline as significant
- ParserElement.setDefaultWhitespaceChars(" \t")
- OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
- """
- ParserElement.DEFAULT_WHITE_CHARS = chars
-
- @staticmethod
- def inlineLiteralsUsing(cls):
- """
- Set class to be used for inclusion of string literals into a parser.
-
- Example::
- # default literal class used is Literal
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
- date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
-
-
- # change to Suppress
- ParserElement.inlineLiteralsUsing(Suppress)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
-
- date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
- """
- ParserElement._literalStringClass = cls
-
- def __init__( self, savelist=False ):
- self.parseAction = list()
- self.failAction = None
- #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
- self.strRepr = None
- self.resultsName = None
- self.saveAsList = savelist
- self.skipWhitespace = True
- self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
- self.copyDefaultWhiteChars = True
- self.mayReturnEmpty = False # used when checking for left-recursion
- self.keepTabs = False
- self.ignoreExprs = list()
- self.debug = False
- self.streamlined = False
- self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
- self.errmsg = ""
- self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
- self.debugActions = ( None, None, None ) #custom debug actions
- self.re = None
- self.callPreparse = True # used to avoid redundant calls to preParse
- self.callDuringTry = False
-
- def copy( self ):
- """
- Make a copy of this C{ParserElement}. Useful for defining different parse actions
- for the same parsing pattern, using copies of the original parse element.
-
- Example::
- integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
- integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
- integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
-
- print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
- prints::
- [5120, 100, 655360, 268435456]
- Equivalent form of C{expr.copy()} is just C{expr()}::
- integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
- """
- cpy = copy.copy( self )
- cpy.parseAction = self.parseAction[:]
- cpy.ignoreExprs = self.ignoreExprs[:]
- if self.copyDefaultWhiteChars:
- cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
- return cpy
-
- def setName( self, name ):
- """
- Define name for this expression, makes debugging and exception messages clearer.
-
- Example::
- Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
- Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
- """
- self.name = name
- self.errmsg = "Expected " + self.name
- if hasattr(self,"exception"):
- self.exception.msg = self.errmsg
- return self
-
- def setResultsName( self, name, listAllMatches=False ):
- """
- Define name for referencing matching tokens as a nested attribute
- of the returned parse results.
- NOTE: this returns a *copy* of the original C{ParserElement} object;
- this is so that the client can define a basic element, such as an
- integer, and reference it in multiple places with different names.
-
- You can also set results names using the abbreviated syntax,
- C{expr("name")} in place of C{expr.setResultsName("name")} -
- see L{I{__call__}<__call__>}.
-
- Example::
- date_str = (integer.setResultsName("year") + '/'
- + integer.setResultsName("month") + '/'
- + integer.setResultsName("day"))
-
- # equivalent form:
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
- """
- newself = self.copy()
- if name.endswith("*"):
- name = name[:-1]
- listAllMatches=True
- newself.resultsName = name
- newself.modalResults = not listAllMatches
- return newself
-
- def setBreak(self,breakFlag = True):
- """Method to invoke the Python pdb debugger when this element is
- about to be parsed. Set C{breakFlag} to True to enable, False to
- disable.
- """
- if breakFlag:
- _parseMethod = self._parse
- def breaker(instring, loc, doActions=True, callPreParse=True):
- import pdb
- pdb.set_trace()
- return _parseMethod( instring, loc, doActions, callPreParse )
- breaker._originalParseMethod = _parseMethod
- self._parse = breaker
- else:
- if hasattr(self._parse,"_originalParseMethod"):
- self._parse = self._parse._originalParseMethod
- return self
-
- def setParseAction( self, *fns, **kwargs ):
- """
- Define one or more actions to perform when successfully matching parse element definition.
- Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
- C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
- - s = the original string being parsed (see note below)
- - loc = the location of the matching substring
- - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
- If the functions in fns modify the tokens, they can return them as the return
- value from fn, and the modified list of tokens will replace the original.
- Otherwise, fn does not need to return any value.
-
- Optional keyword arguments:
- - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
-
- Note: the default parsing behavior is to expand tabs in the input string
- before starting the parsing process. See L{I{parseString}<parseString>} for more information
- on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
- consistent view of the parsed string, the parse location, and line and column
- positions within the parsed string.
-
- Example::
- integer = Word(nums)
- date_str = integer + '/' + integer + '/' + integer
-
- date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
-
- # use parse action to convert to ints at parse time
- integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
- date_str = integer + '/' + integer + '/' + integer
-
- # note that integer fields are now ints, not strings
- date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
- """
- self.parseAction = list(map(_trim_arity, list(fns)))
- self.callDuringTry = kwargs.get("callDuringTry", False)
- return self
-
- def addParseAction( self, *fns, **kwargs ):
- """
- Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
-
- See examples in L{I{copy}<copy>}.
- """
- self.parseAction += list(map(_trim_arity, list(fns)))
- self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
- return self
-
- def addCondition(self, *fns, **kwargs):
- """Add a boolean predicate function to expression's list of parse actions. See
- L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
- functions passed to C{addCondition} need to return boolean success/fail of the condition.
-
- Optional keyword arguments:
- - message = define a custom message to be used in the raised exception
- - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
-
- Example::
- integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
- year_int = integer.copy()
- year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
- date_str = year_int + '/' + integer + '/' + integer
-
- result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
- """
- msg = kwargs.get("message", "failed user-defined condition")
- exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
- for fn in fns:
- def pa(s,l,t):
- if not bool(_trim_arity(fn)(s,l,t)):
- raise exc_type(s,l,msg)
- self.parseAction.append(pa)
- self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
- return self
-
- def setFailAction( self, fn ):
- """Define action to perform if parsing fails at this expression.
- Fail acton fn is a callable function that takes the arguments
- C{fn(s,loc,expr,err)} where:
- - s = string being parsed
- - loc = location where expression match was attempted and failed
- - expr = the parse expression that failed
- - err = the exception thrown
- The function returns no value. It may throw C{L{ParseFatalException}}
- if it is desired to stop parsing immediately."""
- self.failAction = fn
- return self
-
- def _skipIgnorables( self, instring, loc ):
- exprsFound = True
- while exprsFound:
- exprsFound = False
- for e in self.ignoreExprs:
- try:
- while 1:
- loc,dummy = e._parse( instring, loc )
- exprsFound = True
- except ParseException:
- pass
- return loc
-
- def preParse( self, instring, loc ):
- if self.ignoreExprs:
- loc = self._skipIgnorables( instring, loc )
-
- if self.skipWhitespace:
- wt = self.whiteChars
- instrlen = len(instring)
- while loc < instrlen and instring[loc] in wt:
- loc += 1
-
- return loc
-
- def parseImpl( self, instring, loc, doActions=True ):
- return loc, []
-
- def postParse( self, instring, loc, tokenlist ):
- return tokenlist
-
- #~ @profile
- def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
- debugging = ( self.debug ) #and doActions )
-
- if debugging or self.failAction:
- #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
- if (self.debugActions[0] ):
- self.debugActions[0]( instring, loc, self )
- if callPreParse and self.callPreparse:
- preloc = self.preParse( instring, loc )
- else:
- preloc = loc
- tokensStart = preloc
- try:
- try:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
- except IndexError:
- raise ParseException( instring, len(instring), self.errmsg, self )
- except ParseBaseException as err:
- #~ print ("Exception raised:", err)
- if self.debugActions[2]:
- self.debugActions[2]( instring, tokensStart, self, err )
- if self.failAction:
- self.failAction( instring, tokensStart, self, err )
- raise
- else:
- if callPreParse and self.callPreparse:
- preloc = self.preParse( instring, loc )
- else:
- preloc = loc
- tokensStart = preloc
- if self.mayIndexError or preloc >= len(instring):
- try:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
- except IndexError:
- raise ParseException( instring, len(instring), self.errmsg, self )
- else:
- loc,tokens = self.parseImpl( instring, preloc, doActions )
-
- tokens = self.postParse( instring, loc, tokens )
-
- retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
- if self.parseAction and (doActions or self.callDuringTry):
- if debugging:
- try:
- for fn in self.parseAction:
- tokens = fn( instring, tokensStart, retTokens )
- if tokens is not None:
- retTokens = ParseResults( tokens,
- self.resultsName,
- asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
- modal=self.modalResults )
- except ParseBaseException as err:
- #~ print "Exception raised in user parse action:", err
- if (self.debugActions[2] ):
- self.debugActions[2]( instring, tokensStart, self, err )
- raise
- else:
- for fn in self.parseAction:
- tokens = fn( instring, tokensStart, retTokens )
- if tokens is not None:
- retTokens = ParseResults( tokens,
- self.resultsName,
- asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
- modal=self.modalResults )
- if debugging:
- #~ print ("Matched",self,"->",retTokens.asList())
- if (self.debugActions[1] ):
- self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
-
- return loc, retTokens
-
- def tryParse( self, instring, loc ):
- try:
- return self._parse( instring, loc, doActions=False )[0]
- except ParseFatalException:
- raise ParseException( instring, loc, self.errmsg, self)
-
- def canParseNext(self, instring, loc):
- try:
- self.tryParse(instring, loc)
- except (ParseException, IndexError):
- return False
- else:
- return True
-
- class _UnboundedCache(object):
- def __init__(self):
- cache = {}
- self.not_in_cache = not_in_cache = object()
-
- def get(self, key):
- return cache.get(key, not_in_cache)
-
- def set(self, key, value):
- cache[key] = value
-
- def clear(self):
- cache.clear()
-
- def cache_len(self):
- return len(cache)
-
- self.get = types.MethodType(get, self)
- self.set = types.MethodType(set, self)
- self.clear = types.MethodType(clear, self)
- self.__len__ = types.MethodType(cache_len, self)
-
- if _OrderedDict is not None:
- class _FifoCache(object):
- def __init__(self, size):
- self.not_in_cache = not_in_cache = object()
-
- cache = _OrderedDict()
-
- def get(self, key):
- return cache.get(key, not_in_cache)
-
- def set(self, key, value):
- cache[key] = value
- while len(cache) > size:
- try:
- cache.popitem(False)
- except KeyError:
- pass
-
- def clear(self):
- cache.clear()
-
- def cache_len(self):
- return len(cache)
-
- self.get = types.MethodType(get, self)
- self.set = types.MethodType(set, self)
- self.clear = types.MethodType(clear, self)
- self.__len__ = types.MethodType(cache_len, self)
-
- else:
- class _FifoCache(object):
- def __init__(self, size):
- self.not_in_cache = not_in_cache = object()
-
- cache = {}
- key_fifo = collections.deque([], size)
-
- def get(self, key):
- return cache.get(key, not_in_cache)
-
- def set(self, key, value):
- cache[key] = value
- while len(key_fifo) > size:
- cache.pop(key_fifo.popleft(), None)
- key_fifo.append(key)
-
- def clear(self):
- cache.clear()
- key_fifo.clear()
-
- def cache_len(self):
- return len(cache)
-
- self.get = types.MethodType(get, self)
- self.set = types.MethodType(set, self)
- self.clear = types.MethodType(clear, self)
- self.__len__ = types.MethodType(cache_len, self)
-
- # argument cache for optimizing repeated calls when backtracking through recursive expressions
- packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
- packrat_cache_lock = RLock()
- packrat_cache_stats = [0, 0]
-
- # this method gets repeatedly called during backtracking with the same arguments -
- # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
- def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
- HIT, MISS = 0, 1
- lookup = (self, instring, loc, callPreParse, doActions)
- with ParserElement.packrat_cache_lock:
- cache = ParserElement.packrat_cache
- value = cache.get(lookup)
- if value is cache.not_in_cache:
- ParserElement.packrat_cache_stats[MISS] += 1
- try:
- value = self._parseNoCache(instring, loc, doActions, callPreParse)
- except ParseBaseException as pe:
- # cache a copy of the exception, without the traceback
- cache.set(lookup, pe.__class__(*pe.args))
- raise
- else:
- cache.set(lookup, (value[0], value[1].copy()))
- return value
- else:
- ParserElement.packrat_cache_stats[HIT] += 1
- if isinstance(value, Exception):
- raise value
- return (value[0], value[1].copy())
-
- _parse = _parseNoCache
-
- @staticmethod
- def resetCache():
- ParserElement.packrat_cache.clear()
- ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
-
- _packratEnabled = False
- @staticmethod
- def enablePackrat(cache_size_limit=128):
- """Enables "packrat" parsing, which adds memoizing to the parsing logic.
- Repeated parse attempts at the same string location (which happens
- often in many complex grammars) can immediately return a cached value,
- instead of re-executing parsing/validating code. Memoizing is done of
- both valid results and parsing exceptions.
-
- Parameters:
- - cache_size_limit - (default=C{128}) - if an integer value is provided
- will limit the size of the packrat cache; if None is passed, then
- the cache size will be unbounded; if 0 is passed, the cache will
- be effectively disabled.
-
- This speedup may break existing programs that use parse actions that
- have side-effects. For this reason, packrat parsing is disabled when
- you first import pyparsing. To activate the packrat feature, your
- program must call the class method C{ParserElement.enablePackrat()}. If
- your program uses C{psyco} to "compile as you go", you must call
- C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
- Python will crash. For best results, call C{enablePackrat()} immediately
- after importing pyparsing.
-
- Example::
- import pyparsing
- pyparsing.ParserElement.enablePackrat()
- """
- if not ParserElement._packratEnabled:
- ParserElement._packratEnabled = True
- if cache_size_limit is None:
- ParserElement.packrat_cache = ParserElement._UnboundedCache()
- else:
- ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
- ParserElement._parse = ParserElement._parseCache
-
- def parseString( self, instring, parseAll=False ):
- """
- Execute the parse expression with the given string.
- This is the main interface to the client code, once the complete
- expression has been built.
-
- If you want the grammar to require that the entire input string be
- successfully parsed, then set C{parseAll} to True (equivalent to ending
- the grammar with C{L{StringEnd()}}).
-
- Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
- in order to report proper column numbers in parse actions.
- If the input string contains tabs and
- the grammar uses parse actions that use the C{loc} argument to index into the
- string being parsed, you can ensure you have a consistent view of the input
- string by:
- - calling C{parseWithTabs} on your grammar before calling C{parseString}
- (see L{I{parseWithTabs}<parseWithTabs>})
- - define your parse action using the full C{(s,loc,toks)} signature, and
- reference the input string using the parse action's C{s} argument
- - explictly expand the tabs in your input string before calling
- C{parseString}
-
- Example::
- Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
- Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
- """
- ParserElement.resetCache()
- if not self.streamlined:
- self.streamline()
- #~ self.saveAsList = True
- for e in self.ignoreExprs:
- e.streamline()
- if not self.keepTabs:
- instring = instring.expandtabs()
- try:
- loc, tokens = self._parse( instring, 0 )
- if parseAll:
- loc = self.preParse( instring, loc )
- se = Empty() + StringEnd()
- se._parse( instring, loc )
- except ParseBaseException as exc:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise exc
- else:
- return tokens
-
- def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
- """
- Scan the input string for expression matches. Each match will return the
- matching tokens, start location, and end location. May be called with optional
- C{maxMatches} argument, to clip scanning after 'n' matches are found. If
- C{overlap} is specified, then overlapping matches will be reported.
-
- Note that the start and end locations are reported relative to the string
- being parsed. See L{I{parseString}<parseString>} for more information on parsing
- strings with embedded tabs.
-
- Example::
- source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
- print(source)
- for tokens,start,end in Word(alphas).scanString(source):
- print(' '*start + '^'*(end-start))
- print(' '*start + tokens[0])
-
- prints::
-
- sldjf123lsdjjkf345sldkjf879lkjsfd987
- ^^^^^
- sldjf
- ^^^^^^^
- lsdjjkf
- ^^^^^^
- sldkjf
- ^^^^^^
- lkjsfd
- """
- if not self.streamlined:
- self.streamline()
- for e in self.ignoreExprs:
- e.streamline()
-
- if not self.keepTabs:
- instring = _ustr(instring).expandtabs()
- instrlen = len(instring)
- loc = 0
- preparseFn = self.preParse
- parseFn = self._parse
- ParserElement.resetCache()
- matches = 0
- try:
- while loc <= instrlen and matches < maxMatches:
- try:
- preloc = preparseFn( instring, loc )
- nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
- except ParseException:
- loc = preloc+1
- else:
- if nextLoc > loc:
- matches += 1
- yield tokens, preloc, nextLoc
- if overlap:
- nextloc = preparseFn( instring, loc )
- if nextloc > loc:
- loc = nextLoc
- else:
- loc += 1
- else:
- loc = nextLoc
- else:
- loc = preloc+1
- except ParseBaseException as exc:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise exc
-
- def transformString( self, instring ):
- """
- Extension to C{L{scanString}}, to modify matching text with modified tokens that may
- be returned from a parse action. To use C{transformString}, define a grammar and
- attach a parse action to it that modifies the returned token list.
- Invoking C{transformString()} on a target string will then scan for matches,
- and replace the matched text patterns according to the logic in the parse
- action. C{transformString()} returns the resulting transformed string.
-
- Example::
- wd = Word(alphas)
- wd.setParseAction(lambda toks: toks[0].title())
-
- print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
- Prints::
- Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
- """
- out = []
- lastE = 0
- # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
- # keep string locs straight between transformString and scanString
- self.keepTabs = True
- try:
- for t,s,e in self.scanString( instring ):
- out.append( instring[lastE:s] )
- if t:
- if isinstance(t,ParseResults):
- out += t.asList()
- elif isinstance(t,list):
- out += t
- else:
- out.append(t)
- lastE = e
- out.append(instring[lastE:])
- out = [o for o in out if o]
- return "".join(map(_ustr,_flatten(out)))
- except ParseBaseException as exc:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise exc
-
- def searchString( self, instring, maxMatches=_MAX_INT ):
- """
- Another extension to C{L{scanString}}, simplifying the access to the tokens found
- to match the given parse expression. May be called with optional
- C{maxMatches} argument, to clip searching after 'n' matches are found.
-
- Example::
- # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
- cap_word = Word(alphas.upper(), alphas.lower())
-
- print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
-
- # the sum() builtin can be used to merge results into a single ParseResults object
- print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
- prints::
- [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
- ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
- """
- try:
- return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
- except ParseBaseException as exc:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise exc
-
- def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
- """
- Generator method to split a string using the given expression as a separator.
- May be called with optional C{maxsplit} argument, to limit the number of splits;
- and the optional C{includeSeparators} argument (default=C{False}), if the separating
- matching text should be included in the split results.
-
- Example::
- punc = oneOf(list(".,;:/-!?"))
- print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
- prints::
- ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
- """
- splits = 0
- last = 0
- for t,s,e in self.scanString(instring, maxMatches=maxsplit):
- yield instring[last:s]
- if includeSeparators:
- yield t[0]
- last = e
- yield instring[last:]
-
- def __add__(self, other ):
- """
- Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
- converts them to L{Literal}s by default.
-
- Example::
- greet = Word(alphas) + "," + Word(alphas) + "!"
- hello = "Hello, World!"
- print (hello, "->", greet.parseString(hello))
- Prints::
- Hello, World! -> ['Hello', ',', 'World', '!']
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return And( [ self, other ] )
-
- def __radd__(self, other ):
- """
- Implementation of + operator when left operand is not a C{L{ParserElement}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return other + self
-
- def __sub__(self, other):
- """
- Implementation of - operator, returns C{L{And}} with error stop
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return self + And._ErrorStop() + other
-
- def __rsub__(self, other ):
- """
- Implementation of - operator when left operand is not a C{L{ParserElement}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return other - self
-
- def __mul__(self,other):
- """
- Implementation of * operator, allows use of C{expr * 3} in place of
- C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
- tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
- may also include C{None} as in:
- - C{expr*(n,None)} or C{expr*(n,)} is equivalent
- to C{expr*n + L{ZeroOrMore}(expr)}
- (read as "at least n instances of C{expr}")
- - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
- (read as "0 to n instances of C{expr}")
- - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
- - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
-
- Note that C{expr*(None,n)} does not raise an exception if
- more than n exprs exist in the input stream; that is,
- C{expr*(None,n)} does not enforce a maximum number of expr
- occurrences. If this behavior is desired, then write
- C{expr*(None,n) + ~expr}
- """
- if isinstance(other,int):
- minElements, optElements = other,0
- elif isinstance(other,tuple):
- other = (other + (None, None))[:2]
- if other[0] is None:
- other = (0, other[1])
- if isinstance(other[0],int) and other[1] is None:
- if other[0] == 0:
- return ZeroOrMore(self)
- if other[0] == 1:
- return OneOrMore(self)
- else:
- return self*other[0] + ZeroOrMore(self)
- elif isinstance(other[0],int) and isinstance(other[1],int):
- minElements, optElements = other
- optElements -= minElements
- else:
- raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
- else:
- raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
-
- if minElements < 0:
- raise ValueError("cannot multiply ParserElement by negative value")
- if optElements < 0:
- raise ValueError("second tuple value must be greater or equal to first tuple value")
- if minElements == optElements == 0:
- raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
-
- if (optElements):
- def makeOptionalList(n):
- if n>1:
- return Optional(self + makeOptionalList(n-1))
- else:
- return Optional(self)
- if minElements:
- if minElements == 1:
- ret = self + makeOptionalList(optElements)
- else:
- ret = And([self]*minElements) + makeOptionalList(optElements)
- else:
- ret = makeOptionalList(optElements)
- else:
- if minElements == 1:
- ret = self
- else:
- ret = And([self]*minElements)
- return ret
-
- def __rmul__(self, other):
- return self.__mul__(other)
-
- def __or__(self, other ):
- """
- Implementation of | operator - returns C{L{MatchFirst}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return MatchFirst( [ self, other ] )
-
- def __ror__(self, other ):
- """
- Implementation of | operator when left operand is not a C{L{ParserElement}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return other | self
-
- def __xor__(self, other ):
- """
- Implementation of ^ operator - returns C{L{Or}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return Or( [ self, other ] )
-
- def __rxor__(self, other ):
- """
- Implementation of ^ operator when left operand is not a C{L{ParserElement}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return other ^ self
-
- def __and__(self, other ):
- """
- Implementation of & operator - returns C{L{Each}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return Each( [ self, other ] )
-
- def __rand__(self, other ):
- """
- Implementation of & operator when left operand is not a C{L{ParserElement}}
- """
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- if not isinstance( other, ParserElement ):
- warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
- SyntaxWarning, stacklevel=2)
- return None
- return other & self
-
- def __invert__( self ):
- """
- Implementation of ~ operator - returns C{L{NotAny}}
- """
- return NotAny( self )
-
- def __call__(self, name=None):
- """
- Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
-
- If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
- passed as C{True}.
-
- If C{name} is omitted, same as calling C{L{copy}}.
-
- Example::
- # these are equivalent
- userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
- userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
- """
- if name is not None:
- return self.setResultsName(name)
- else:
- return self.copy()
-
- def suppress( self ):
- """
- Suppresses the output of this C{ParserElement}; useful to keep punctuation from
- cluttering up returned output.
- """
- return Suppress( self )
-
- def leaveWhitespace( self ):
- """
- Disables the skipping of whitespace before matching the characters in the
- C{ParserElement}'s defined pattern. This is normally only used internally by
- the pyparsing module, but may be needed in some whitespace-sensitive grammars.
- """
- self.skipWhitespace = False
- return self
-
- def setWhitespaceChars( self, chars ):
- """
- Overrides the default whitespace chars
- """
- self.skipWhitespace = True
- self.whiteChars = chars
- self.copyDefaultWhiteChars = False
- return self
-
- def parseWithTabs( self ):
- """
- Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
- Must be called before C{parseString} when the input grammar contains elements that
- match C{<TAB>} characters.
- """
- self.keepTabs = True
- return self
-
- def ignore( self, other ):
- """
- Define expression to be ignored (e.g., comments) while doing pattern
- matching; may be called repeatedly, to define multiple comment or other
- ignorable patterns.
-
- Example::
- patt = OneOrMore(Word(alphas))
- patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
-
- patt.ignore(cStyleComment)
- patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
- """
- if isinstance(other, basestring):
- other = Suppress(other)
-
- if isinstance( other, Suppress ):
- if other not in self.ignoreExprs:
- self.ignoreExprs.append(other)
- else:
- self.ignoreExprs.append( Suppress( other.copy() ) )
- return self
-
- def setDebugActions( self, startAction, successAction, exceptionAction ):
- """
- Enable display of debugging messages while doing pattern matching.
- """
- self.debugActions = (startAction or _defaultStartDebugAction,
- successAction or _defaultSuccessDebugAction,
- exceptionAction or _defaultExceptionDebugAction)
- self.debug = True
- return self
-
- def setDebug( self, flag=True ):
- """
- Enable display of debugging messages while doing pattern matching.
- Set C{flag} to True to enable, False to disable.
-
- Example::
- wd = Word(alphas).setName("alphaword")
- integer = Word(nums).setName("numword")
- term = wd | integer
-
- # turn on debugging for wd
- wd.setDebug()
-
- OneOrMore(term).parseString("abc 123 xyz 890")
-
- prints::
- Match alphaword at loc 0(1,1)
- Matched alphaword -> ['abc']
- Match alphaword at loc 3(1,4)
- Exception raised:Expected alphaword (at char 4), (line:1, col:5)
- Match alphaword at loc 7(1,8)
- Matched alphaword -> ['xyz']
- Match alphaword at loc 11(1,12)
- Exception raised:Expected alphaword (at char 12), (line:1, col:13)
- Match alphaword at loc 15(1,16)
- Exception raised:Expected alphaword (at char 15), (line:1, col:16)
-
- The output shown is that produced by the default debug actions - custom debug actions can be
- specified using L{setDebugActions}. Prior to attempting
- to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
- is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
- message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
- which makes debugging and exception messages easier to understand - for instance, the default
- name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
- """
- if flag:
- self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
- else:
- self.debug = False
- return self
-
- def __str__( self ):
- return self.name
-
- def __repr__( self ):
- return _ustr(self)
-
- def streamline( self ):
- self.streamlined = True
- self.strRepr = None
- return self
-
- def checkRecursion( self, parseElementList ):
- pass
-
- def validate( self, validateTrace=[] ):
- """
- Check defined expressions for valid structure, check for infinite recursive definitions.
- """
- self.checkRecursion( [] )
-
- def parseFile( self, file_or_filename, parseAll=False ):
- """
- Execute the parse expression on the given file or filename.
- If a filename is specified (instead of a file object),
- the entire file is opened, read, and closed before parsing.
- """
- try:
- file_contents = file_or_filename.read()
- except AttributeError:
- with open(file_or_filename, "r") as f:
- file_contents = f.read()
- try:
- return self.parseString(file_contents, parseAll)
- except ParseBaseException as exc:
- if ParserElement.verbose_stacktrace:
- raise
- else:
- # catch and re-raise exception from here, clears out pyparsing internal stack trace
- raise exc
-
- def __eq__(self,other):
- if isinstance(other, ParserElement):
- return self is other or vars(self) == vars(other)
- elif isinstance(other, basestring):
- return self.matches(other)
- else:
- return super(ParserElement,self)==other
-
- def __ne__(self,other):
- return not (self == other)
-
- def __hash__(self):
- return hash(id(self))
-
- def __req__(self,other):
- return self == other
-
- def __rne__(self,other):
- return not (self == other)
-
- def matches(self, testString, parseAll=True):
- """
- Method for quick testing of a parser against a test string. Good for simple
- inline microtests of sub expressions while building up larger parser.
-
- Parameters:
- - testString - to test against this expression for a match
- - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
-
- Example::
- expr = Word(nums)
- assert expr.matches("100")
- """
- try:
- self.parseString(_ustr(testString), parseAll=parseAll)
- return True
- except ParseBaseException:
- return False
-
- def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
- """
- Execute the parse expression on a series of test strings, showing each
- test, the parsed results or where the parse failed. Quick and easy way to
- run a parse expression against a list of sample strings.
-
- Parameters:
- - tests - a list of separate test strings, or a multiline string of test strings
- - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
- - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
- string; pass None to disable comment filtering
- - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
- if False, only dump nested list
- - printResults - (default=C{True}) prints test output to stdout
- - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
-
- Returns: a (success, results) tuple, where success indicates that all tests succeeded
- (or failed if C{failureTests} is True), and the results contain a list of lines of each
- test's output
-
- Example::
- number_expr = pyparsing_common.number.copy()
-
- result = number_expr.runTests('''
- # unsigned integer
- 100
- # negative integer
- -100
- # float with scientific notation
- 6.02e23
- # integer with scientific notation
- 1e-12
- ''')
- print("Success" if result[0] else "Failed!")
-
- result = number_expr.runTests('''
- # stray character
- 100Z
- # missing leading digit before '.'
- -.100
- # too many '.'
- 3.14.159
- ''', failureTests=True)
- print("Success" if result[0] else "Failed!")
- prints::
- # unsigned integer
- 100
- [100]
-
- # negative integer
- -100
- [-100]
-
- # float with scientific notation
- 6.02e23
- [6.02e+23]
-
- # integer with scientific notation
- 1e-12
- [1e-12]
-
- Success
-
- # stray character
- 100Z
- ^
- FAIL: Expected end of text (at char 3), (line:1, col:4)
-
- # missing leading digit before '.'
- -.100
- ^
- FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
-
- # too many '.'
- 3.14.159
- ^
- FAIL: Expected end of text (at char 4), (line:1, col:5)
-
- Success
-
- Each test string must be on a single line. If you want to test a string that spans multiple
- lines, create a test like this::
-
- expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
-
- (Note that this is a raw string literal, you must include the leading 'r'.)
- """
- if isinstance(tests, basestring):
- tests = list(map(str.strip, tests.rstrip().splitlines()))
- if isinstance(comment, basestring):
- comment = Literal(comment)
- allResults = []
- comments = []
- success = True
- for t in tests:
- if comment is not None and comment.matches(t, False) or comments and not t:
- comments.append(t)
- continue
- if not t:
- continue
- out = ['\n'.join(comments), t]
- comments = []
- try:
- t = t.replace(r'\n','\n')
- result = self.parseString(t, parseAll=parseAll)
- out.append(result.dump(full=fullDump))
- success = success and not failureTests
- except ParseBaseException as pe:
- fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
- if '\n' in t:
- out.append(line(pe.loc, t))
- out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
- else:
- out.append(' '*pe.loc + '^' + fatal)
- out.append("FAIL: " + str(pe))
- success = success and failureTests
- result = pe
- except Exception as exc:
- out.append("FAIL-EXCEPTION: " + str(exc))
- success = success and failureTests
- result = exc
-
- if printResults:
- if fullDump:
- out.append('')
- print('\n'.join(out))
-
- allResults.append((t, result))
-
- return success, allResults
-
-
-class Token(ParserElement):
- """
- Abstract C{ParserElement} subclass, for defining atomic matching patterns.
- """
- def __init__( self ):
- super(Token,self).__init__( savelist=False )
-
-
-class Empty(Token):
- """
- An empty token, will always match.
- """
- def __init__( self ):
- super(Empty,self).__init__()
- self.name = "Empty"
- self.mayReturnEmpty = True
- self.mayIndexError = False
-
-
-class NoMatch(Token):
- """
- A token that will never match.
- """
- def __init__( self ):
- super(NoMatch,self).__init__()
- self.name = "NoMatch"
- self.mayReturnEmpty = True
- self.mayIndexError = False
- self.errmsg = "Unmatchable token"
-
- def parseImpl( self, instring, loc, doActions=True ):
- raise ParseException(instring, loc, self.errmsg, self)
-
-
-class Literal(Token):
- """
- Token to exactly match a specified string.
-
- Example::
- Literal('blah').parseString('blah') # -> ['blah']
- Literal('blah').parseString('blahfooblah') # -> ['blah']
- Literal('blah').parseString('bla') # -> Exception: Expected "blah"
-
- For case-insensitive matching, use L{CaselessLiteral}.
-
- For keyword matching (force word break before and after the matched string),
- use L{Keyword} or L{CaselessKeyword}.
- """
- def __init__( self, matchString ):
- super(Literal,self).__init__()
- self.match = matchString
- self.matchLen = len(matchString)
- try:
- self.firstMatchChar = matchString[0]
- except IndexError:
- warnings.warn("null string passed to Literal; use Empty() instead",
- SyntaxWarning, stacklevel=2)
- self.__class__ = Empty
- self.name = '"%s"' % _ustr(self.match)
- self.errmsg = "Expected " + self.name
- self.mayReturnEmpty = False
- self.mayIndexError = False
-
- # Performance tuning: this routine gets called a *lot*
- # if this is a single character match string and the first character matches,
- # short-circuit as quickly as possible, and avoid calling startswith
- #~ @profile
- def parseImpl( self, instring, loc, doActions=True ):
- if (instring[loc] == self.firstMatchChar and
- (self.matchLen==1 or instring.startswith(self.match,loc)) ):
- return loc+self.matchLen, self.match
- raise ParseException(instring, loc, self.errmsg, self)
-_L = Literal
-ParserElement._literalStringClass = Literal
-
-class Keyword(Token):
- """
- Token to exactly match a specified string as a keyword, that is, it must be
- immediately followed by a non-keyword character. Compare with C{L{Literal}}:
- - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
- - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
- Accepts two optional constructor arguments in addition to the keyword string:
- - C{identChars} is a string of characters that would be valid identifier characters,
- defaulting to all alphanumerics + "_" and "$"
- - C{caseless} allows case-insensitive matching, default is C{False}.
-
- Example::
- Keyword("start").parseString("start") # -> ['start']
- Keyword("start").parseString("starting") # -> Exception
-
- For case-insensitive matching, use L{CaselessKeyword}.
- """
- DEFAULT_KEYWORD_CHARS = alphanums+"_$"
-
- def __init__( self, matchString, identChars=None, caseless=False ):
- super(Keyword,self).__init__()
- if identChars is None:
- identChars = Keyword.DEFAULT_KEYWORD_CHARS
- self.match = matchString
- self.matchLen = len(matchString)
- try:
- self.firstMatchChar = matchString[0]
- except IndexError:
- warnings.warn("null string passed to Keyword; use Empty() instead",
- SyntaxWarning, stacklevel=2)
- self.name = '"%s"' % self.match
- self.errmsg = "Expected " + self.name
- self.mayReturnEmpty = False
- self.mayIndexError = False
- self.caseless = caseless
- if caseless:
- self.caselessmatch = matchString.upper()
- identChars = identChars.upper()
- self.identChars = set(identChars)
-
- def parseImpl( self, instring, loc, doActions=True ):
- if self.caseless:
- if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
- (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
- (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
- return loc+self.matchLen, self.match
- else:
- if (instring[loc] == self.firstMatchChar and
- (self.matchLen==1 or instring.startswith(self.match,loc)) and
- (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
- (loc == 0 or instring[loc-1] not in self.identChars) ):
- return loc+self.matchLen, self.match
- raise ParseException(instring, loc, self.errmsg, self)
-
- def copy(self):
- c = super(Keyword,self).copy()
- c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
- return c
-
- @staticmethod
- def setDefaultKeywordChars( chars ):
- """Overrides the default Keyword chars
- """
- Keyword.DEFAULT_KEYWORD_CHARS = chars
-
-class CaselessLiteral(Literal):
- """
- Token to match a specified string, ignoring case of letters.
- Note: the matched results will always be in the case of the given
- match string, NOT the case of the input text.
-
- Example::
- OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
-
- (Contrast with example for L{CaselessKeyword}.)
- """
- def __init__( self, matchString ):
- super(CaselessLiteral,self).__init__( matchString.upper() )
- # Preserve the defining literal.
- self.returnString = matchString
- self.name = "'%s'" % self.returnString
- self.errmsg = "Expected " + self.name
-
- def parseImpl( self, instring, loc, doActions=True ):
- if instring[ loc:loc+self.matchLen ].upper() == self.match:
- return loc+self.matchLen, self.returnString
- raise ParseException(instring, loc, self.errmsg, self)
-
-class CaselessKeyword(Keyword):
- """
- Caseless version of L{Keyword}.
-
- Example::
- OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
-
- (Contrast with example for L{CaselessLiteral}.)
- """
- def __init__( self, matchString, identChars=None ):
- super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
-
- def parseImpl( self, instring, loc, doActions=True ):
- if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
- (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
- return loc+self.matchLen, self.match
- raise ParseException(instring, loc, self.errmsg, self)
-
-class CloseMatch(Token):
- """
- A variation on L{Literal} which matches "close" matches, that is,
- strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
- - C{match_string} - string to be matched
- - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
-
- The results from a successful parse will contain the matched text from the input string and the following named results:
- - C{mismatches} - a list of the positions within the match_string where mismatches were found
- - C{original} - the original match_string used to compare against the input string
-
- If C{mismatches} is an empty list, then the match was an exact match.
-
- Example::
- patt = CloseMatch("ATCATCGAATGGA")
- patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
- patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
-
- # exact match
- patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
-
- # close match allowing up to 2 mismatches
- patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
- patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
- """
- def __init__(self, match_string, maxMismatches=1):
- super(CloseMatch,self).__init__()
- self.name = match_string
- self.match_string = match_string
- self.maxMismatches = maxMismatches
- self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
- self.mayIndexError = False
- self.mayReturnEmpty = False
-
- def parseImpl( self, instring, loc, doActions=True ):
- start = loc
- instrlen = len(instring)
- maxloc = start + len(self.match_string)
-
- if maxloc <= instrlen:
- match_string = self.match_string
- match_stringloc = 0
- mismatches = []
- maxMismatches = self.maxMismatches
-
- for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
- src,mat = s_m
- if src != mat:
- mismatches.append(match_stringloc)
- if len(mismatches) > maxMismatches:
- break
- else:
- loc = match_stringloc + 1
- results = ParseResults([instring[start:loc]])
- results['original'] = self.match_string
- results['mismatches'] = mismatches
- return loc, results
-
- raise ParseException(instring, loc, self.errmsg, self)
-
-
-class Word(Token):
- """
- Token for matching words composed of allowed character sets.
- Defined with string containing all allowed initial characters,
- an optional string containing allowed body characters (if omitted,
- defaults to the initial character set), and an optional minimum,
- maximum, and/or exact length. The default value for C{min} is 1 (a
- minimum value < 1 is not valid); the default values for C{max} and C{exact}
- are 0, meaning no maximum or exact length restriction. An optional
- C{excludeChars} parameter can list characters that might be found in
- the input C{bodyChars} string; useful to define a word of all printables
- except for one or two characters, for instance.
-
- L{srange} is useful for defining custom character set strings for defining
- C{Word} expressions, using range notation from regular expression character sets.
-
- A common mistake is to use C{Word} to match a specific literal string, as in
- C{Word("Address")}. Remember that C{Word} uses the string argument to define
- I{sets} of matchable characters. This expression would match "Add", "AAA",
- "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
- To match an exact literal string, use L{Literal} or L{Keyword}.
-
- pyparsing includes helper strings for building Words:
- - L{alphas}
- - L{nums}
- - L{alphanums}
- - L{hexnums}
- - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
- - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
- - L{printables} (any non-whitespace character)
-
- Example::
- # a word composed of digits
- integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
-
- # a word with a leading capital, and zero or more lowercase
- capital_word = Word(alphas.upper(), alphas.lower())
-
- # hostnames are alphanumeric, with leading alpha, and '-'
- hostname = Word(alphas, alphanums+'-')
-
- # roman numeral (not a strict parser, accepts invalid mix of characters)
- roman = Word("IVXLCDM")
-
- # any string of non-whitespace characters, except for ','
- csv_value = Word(printables, excludeChars=",")
- """
- def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
- super(Word,self).__init__()
- if excludeChars:
- initChars = ''.join(c for c in initChars if c not in excludeChars)
- if bodyChars:
- bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
- self.initCharsOrig = initChars
- self.initChars = set(initChars)
- if bodyChars :
- self.bodyCharsOrig = bodyChars
- self.bodyChars = set(bodyChars)
- else:
- self.bodyCharsOrig = initChars
- self.bodyChars = set(initChars)
-
- self.maxSpecified = max > 0
-
- if min < 1:
- raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
-
- self.minLen = min
-
- if max > 0:
- self.maxLen = max
- else:
- self.maxLen = _MAX_INT
-
- if exact > 0:
- self.maxLen = exact
- self.minLen = exact
-
- self.name = _ustr(self)
- self.errmsg = "Expected " + self.name
- self.mayIndexError = False
- self.asKeyword = asKeyword
-
- if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
- if self.bodyCharsOrig == self.initCharsOrig:
- self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
- elif len(self.initCharsOrig) == 1:
- self.reString = "%s[%s]*" % \
- (re.escape(self.initCharsOrig),
- _escapeRegexRangeChars(self.bodyCharsOrig),)
- else:
- self.reString = "[%s][%s]*" % \
- (_escapeRegexRangeChars(self.initCharsOrig),
- _escapeRegexRangeChars(self.bodyCharsOrig),)
- if self.asKeyword:
- self.reString = r"\b"+self.reString+r"\b"
- try:
- self.re = re.compile( self.reString )
- except Exception:
- self.re = None
-
- def parseImpl( self, instring, loc, doActions=True ):
- if self.re:
- result = self.re.match(instring,loc)
- if not result:
- raise ParseException(instring, loc, self.errmsg, self)
-
- loc = result.end()
- return loc, result.group()
-
- if not(instring[ loc ] in self.initChars):
- raise ParseException(instring, loc, self.errmsg, self)
-
- start = loc
- loc += 1
- instrlen = len(instring)
- bodychars = self.bodyChars
- maxloc = start + self.maxLen
- maxloc = min( maxloc, instrlen )
- while loc < maxloc and instring[loc] in bodychars:
- loc += 1
-
- throwException = False
- if loc - start < self.minLen:
- throwException = True
- if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
- throwException = True
- if self.asKeyword:
- if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
- throwException = True
-
- if throwException:
- raise ParseException(instring, loc, self.errmsg, self)
-
- return loc, instring[start:loc]
-
- def __str__( self ):
- try:
- return super(Word,self).__str__()
- except Exception:
- pass
-
-
- if self.strRepr is None:
-
- def charsAsStr(s):
- if len(s)>4:
- return s[:4]+"..."
- else:
- return s
-
- if ( self.initCharsOrig != self.bodyCharsOrig ):
- self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
- else:
- self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
-
- return self.strRepr
-
-
-class Regex(Token):
- r"""
- Token for matching strings that match a given regular expression.
- Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
- If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
- named parse results.
-
- Example::
- realnum = Regex(r"[+-]?\d+\.\d*")
- date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
- # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
- roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
- """
- compiledREtype = type(re.compile("[A-Z]"))
- def __init__( self, pattern, flags=0):
- """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
- super(Regex,self).__init__()
-
- if isinstance(pattern, basestring):
- if not pattern:
- warnings.warn("null string passed to Regex; use Empty() instead",
- SyntaxWarning, stacklevel=2)
-
- self.pattern = pattern
- self.flags = flags
-
- try:
- self.re = re.compile(self.pattern, self.flags)
- self.reString = self.pattern
- except sre_constants.error:
- warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
- SyntaxWarning, stacklevel=2)
- raise
-
- elif isinstance(pattern, Regex.compiledREtype):
- self.re = pattern
- self.pattern = \
- self.reString = str(pattern)
- self.flags = flags
-
- else:
- raise ValueError("Regex may only be constructed with a string or a compiled RE object")
-
- self.name = _ustr(self)
- self.errmsg = "Expected " + self.name
- self.mayIndexError = False
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- result = self.re.match(instring,loc)
- if not result:
- raise ParseException(instring, loc, self.errmsg, self)
-
- loc = result.end()
- d = result.groupdict()
- ret = ParseResults(result.group())
- if d:
- for k in d:
- ret[k] = d[k]
- return loc,ret
-
- def __str__( self ):
- try:
- return super(Regex,self).__str__()
- except Exception:
- pass
-
- if self.strRepr is None:
- self.strRepr = "Re:(%s)" % repr(self.pattern)
-
- return self.strRepr
-
-
-class QuotedString(Token):
- r"""
- Token for matching strings that are delimited by quoting characters.
-
- Defined with the following parameters:
- - quoteChar - string of one or more characters defining the quote delimiting string
- - escChar - character to escape quotes, typically backslash (default=C{None})
- - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
- - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
- - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
- - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
- - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
-
- Example::
- qs = QuotedString('"')
- print(qs.searchString('lsjdf "This is the quote" sldjf'))
- complex_qs = QuotedString('{{', endQuoteChar='}}')
- print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
- sql_qs = QuotedString('"', escQuote='""')
- print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
- prints::
- [['This is the quote']]
- [['This is the "quote"']]
- [['This is the quote with "embedded" quotes']]
- """
- def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
- super(QuotedString,self).__init__()
-
- # remove white space from quote chars - wont work anyway
- quoteChar = quoteChar.strip()
- if not quoteChar:
- warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
- raise SyntaxError()
-
- if endQuoteChar is None:
- endQuoteChar = quoteChar
- else:
- endQuoteChar = endQuoteChar.strip()
- if not endQuoteChar:
- warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
- raise SyntaxError()
-
- self.quoteChar = quoteChar
- self.quoteCharLen = len(quoteChar)
- self.firstQuoteChar = quoteChar[0]
- self.endQuoteChar = endQuoteChar
- self.endQuoteCharLen = len(endQuoteChar)
- self.escChar = escChar
- self.escQuote = escQuote
- self.unquoteResults = unquoteResults
- self.convertWhitespaceEscapes = convertWhitespaceEscapes
-
- if multiline:
- self.flags = re.MULTILINE | re.DOTALL
- self.pattern = r'%s(?:[^%s%s]' % \
- ( re.escape(self.quoteChar),
- _escapeRegexRangeChars(self.endQuoteChar[0]),
- (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
- else:
- self.flags = 0
- self.pattern = r'%s(?:[^%s\n\r%s]' % \
- ( re.escape(self.quoteChar),
- _escapeRegexRangeChars(self.endQuoteChar[0]),
- (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
- if len(self.endQuoteChar) > 1:
- self.pattern += (
- '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
- _escapeRegexRangeChars(self.endQuoteChar[i]))
- for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
- )
- if escQuote:
- self.pattern += (r'|(?:%s)' % re.escape(escQuote))
- if escChar:
- self.pattern += (r'|(?:%s.)' % re.escape(escChar))
- self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
- self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
-
- try:
- self.re = re.compile(self.pattern, self.flags)
- self.reString = self.pattern
- except sre_constants.error:
- warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
- SyntaxWarning, stacklevel=2)
- raise
-
- self.name = _ustr(self)
- self.errmsg = "Expected " + self.name
- self.mayIndexError = False
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
- if not result:
- raise ParseException(instring, loc, self.errmsg, self)
-
- loc = result.end()
- ret = result.group()
-
- if self.unquoteResults:
-
- # strip off quotes
- ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
-
- if isinstance(ret,basestring):
- # replace escaped whitespace
- if '\\' in ret and self.convertWhitespaceEscapes:
- ws_map = {
- r'\t' : '\t',
- r'\n' : '\n',
- r'\f' : '\f',
- r'\r' : '\r',
- }
- for wslit,wschar in ws_map.items():
- ret = ret.replace(wslit, wschar)
-
- # replace escaped characters
- if self.escChar:
- ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
-
- # replace escaped quotes
- if self.escQuote:
- ret = ret.replace(self.escQuote, self.endQuoteChar)
-
- return loc, ret
-
- def __str__( self ):
- try:
- return super(QuotedString,self).__str__()
- except Exception:
- pass
-
- if self.strRepr is None:
- self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
-
- return self.strRepr
-
-
-class CharsNotIn(Token):
- """
- Token for matching words composed of characters I{not} in a given set (will
- include whitespace in matched characters if not listed in the provided exclusion set - see example).
- Defined with string containing all disallowed characters, and an optional
- minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
- minimum value < 1 is not valid); the default values for C{max} and C{exact}
- are 0, meaning no maximum or exact length restriction.
-
- Example::
- # define a comma-separated-value as anything that is not a ','
- csv_value = CharsNotIn(',')
- print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
- prints::
- ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
- """
- def __init__( self, notChars, min=1, max=0, exact=0 ):
- super(CharsNotIn,self).__init__()
- self.skipWhitespace = False
- self.notChars = notChars
-
- if min < 1:
- raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
-
- self.minLen = min
-
- if max > 0:
- self.maxLen = max
- else:
- self.maxLen = _MAX_INT
-
- if exact > 0:
- self.maxLen = exact
- self.minLen = exact
-
- self.name = _ustr(self)
- self.errmsg = "Expected " + self.name
- self.mayReturnEmpty = ( self.minLen == 0 )
- self.mayIndexError = False
-
- def parseImpl( self, instring, loc, doActions=True ):
- if instring[loc] in self.notChars:
- raise ParseException(instring, loc, self.errmsg, self)
-
- start = loc
- loc += 1
- notchars = self.notChars
- maxlen = min( start+self.maxLen, len(instring) )
- while loc < maxlen and \
- (instring[loc] not in notchars):
- loc += 1
-
- if loc - start < self.minLen:
- raise ParseException(instring, loc, self.errmsg, self)
-
- return loc, instring[start:loc]
-
- def __str__( self ):
- try:
- return super(CharsNotIn, self).__str__()
- except Exception:
- pass
-
- if self.strRepr is None:
- if len(self.notChars) > 4:
- self.strRepr = "!W:(%s...)" % self.notChars[:4]
- else:
- self.strRepr = "!W:(%s)" % self.notChars
-
- return self.strRepr
-
-class White(Token):
- """
- Special matching class for matching whitespace. Normally, whitespace is ignored
- by pyparsing grammars. This class is included when some whitespace structures
- are significant. Define with a string containing the whitespace characters to be
- matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
- as defined for the C{L{Word}} class.
- """
- whiteStrs = {
- " " : "<SPC>",
- "\t": "<TAB>",
- "\n": "<LF>",
- "\r": "<CR>",
- "\f": "<FF>",
- }
- def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
- super(White,self).__init__()
- self.matchWhite = ws
- self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
- #~ self.leaveWhitespace()
- self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
- self.mayReturnEmpty = True
- self.errmsg = "Expected " + self.name
-
- self.minLen = min
-
- if max > 0:
- self.maxLen = max
- else:
- self.maxLen = _MAX_INT
-
- if exact > 0:
- self.maxLen = exact
- self.minLen = exact
-
- def parseImpl( self, instring, loc, doActions=True ):
- if not(instring[ loc ] in self.matchWhite):
- raise ParseException(instring, loc, self.errmsg, self)
- start = loc
- loc += 1
- maxloc = start + self.maxLen
- maxloc = min( maxloc, len(instring) )
- while loc < maxloc and instring[loc] in self.matchWhite:
- loc += 1
-
- if loc - start < self.minLen:
- raise ParseException(instring, loc, self.errmsg, self)
-
- return loc, instring[start:loc]
-
-
-class _PositionToken(Token):
- def __init__( self ):
- super(_PositionToken,self).__init__()
- self.name=self.__class__.__name__
- self.mayReturnEmpty = True
- self.mayIndexError = False
-
-class GoToColumn(_PositionToken):
- """
- Token to advance to a specific column of input text; useful for tabular report scraping.
- """
- def __init__( self, colno ):
- super(GoToColumn,self).__init__()
- self.col = colno
-
- def preParse( self, instring, loc ):
- if col(loc,instring) != self.col:
- instrlen = len(instring)
- if self.ignoreExprs:
- loc = self._skipIgnorables( instring, loc )
- while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
- loc += 1
- return loc
-
- def parseImpl( self, instring, loc, doActions=True ):
- thiscol = col( loc, instring )
- if thiscol > self.col:
- raise ParseException( instring, loc, "Text not in expected column", self )
- newloc = loc + self.col - thiscol
- ret = instring[ loc: newloc ]
- return newloc, ret
-
-
-class LineStart(_PositionToken):
- """
- Matches if current position is at the beginning of a line within the parse string
-
- Example::
-
- test = '''\
- AAA this line
- AAA and this line
- AAA but not this one
- B AAA and definitely not this one
- '''
-
- for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
- print(t)
-
- Prints::
- ['AAA', ' this line']
- ['AAA', ' and this line']
-
- """
- def __init__( self ):
- super(LineStart,self).__init__()
- self.errmsg = "Expected start of line"
-
- def parseImpl( self, instring, loc, doActions=True ):
- if col(loc, instring) == 1:
- return loc, []
- raise ParseException(instring, loc, self.errmsg, self)
-
-class LineEnd(_PositionToken):
- """
- Matches if current position is at the end of a line within the parse string
- """
- def __init__( self ):
- super(LineEnd,self).__init__()
- self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
- self.errmsg = "Expected end of line"
-
- def parseImpl( self, instring, loc, doActions=True ):
- if loc<len(instring):
- if instring[loc] == "\n":
- return loc+1, "\n"
- else:
- raise ParseException(instring, loc, self.errmsg, self)
- elif loc == len(instring):
- return loc+1, []
- else:
- raise ParseException(instring, loc, self.errmsg, self)
-
-class StringStart(_PositionToken):
- """
- Matches if current position is at the beginning of the parse string
- """
- def __init__( self ):
- super(StringStart,self).__init__()
- self.errmsg = "Expected start of text"
-
- def parseImpl( self, instring, loc, doActions=True ):
- if loc != 0:
- # see if entire string up to here is just whitespace and ignoreables
- if loc != self.preParse( instring, 0 ):
- raise ParseException(instring, loc, self.errmsg, self)
- return loc, []
-
-class StringEnd(_PositionToken):
- """
- Matches if current position is at the end of the parse string
- """
- def __init__( self ):
- super(StringEnd,self).__init__()
- self.errmsg = "Expected end of text"
-
- def parseImpl( self, instring, loc, doActions=True ):
- if loc < len(instring):
- raise ParseException(instring, loc, self.errmsg, self)
- elif loc == len(instring):
- return loc+1, []
- elif loc > len(instring):
- return loc, []
- else:
- raise ParseException(instring, loc, self.errmsg, self)
-
-class WordStart(_PositionToken):
- """
- Matches if the current position is at the beginning of a Word, and
- is not preceded by any character in a given set of C{wordChars}
- (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
- use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
- the string being parsed, or at the beginning of a line.
- """
- def __init__(self, wordChars = printables):
- super(WordStart,self).__init__()
- self.wordChars = set(wordChars)
- self.errmsg = "Not at the start of a word"
-
- def parseImpl(self, instring, loc, doActions=True ):
- if loc != 0:
- if (instring[loc-1] in self.wordChars or
- instring[loc] not in self.wordChars):
- raise ParseException(instring, loc, self.errmsg, self)
- return loc, []
-
-class WordEnd(_PositionToken):
- """
- Matches if the current position is at the end of a Word, and
- is not followed by any character in a given set of C{wordChars}
- (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
- use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
- the string being parsed, or at the end of a line.
- """
- def __init__(self, wordChars = printables):
- super(WordEnd,self).__init__()
- self.wordChars = set(wordChars)
- self.skipWhitespace = False
- self.errmsg = "Not at the end of a word"
-
- def parseImpl(self, instring, loc, doActions=True ):
- instrlen = len(instring)
- if instrlen>0 and loc<instrlen:
- if (instring[loc] in self.wordChars or
- instring[loc-1] not in self.wordChars):
- raise ParseException(instring, loc, self.errmsg, self)
- return loc, []
-
-
-class ParseExpression(ParserElement):
- """
- Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
- """
- def __init__( self, exprs, savelist = False ):
- super(ParseExpression,self).__init__(savelist)
- if isinstance( exprs, _generatorType ):
- exprs = list(exprs)
-
- if isinstance( exprs, basestring ):
- self.exprs = [ ParserElement._literalStringClass( exprs ) ]
- elif isinstance( exprs, Iterable ):
- exprs = list(exprs)
- # if sequence of strings provided, wrap with Literal
- if all(isinstance(expr, basestring) for expr in exprs):
- exprs = map(ParserElement._literalStringClass, exprs)
- self.exprs = list(exprs)
- else:
- try:
- self.exprs = list( exprs )
- except TypeError:
- self.exprs = [ exprs ]
- self.callPreparse = False
-
- def __getitem__( self, i ):
- return self.exprs[i]
-
- def append( self, other ):
- self.exprs.append( other )
- self.strRepr = None
- return self
-
- def leaveWhitespace( self ):
- """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
- all contained expressions."""
- self.skipWhitespace = False
- self.exprs = [ e.copy() for e in self.exprs ]
- for e in self.exprs:
- e.leaveWhitespace()
- return self
-
- def ignore( self, other ):
- if isinstance( other, Suppress ):
- if other not in self.ignoreExprs:
- super( ParseExpression, self).ignore( other )
- for e in self.exprs:
- e.ignore( self.ignoreExprs[-1] )
- else:
- super( ParseExpression, self).ignore( other )
- for e in self.exprs:
- e.ignore( self.ignoreExprs[-1] )
- return self
-
- def __str__( self ):
- try:
- return super(ParseExpression,self).__str__()
- except Exception:
- pass
-
- if self.strRepr is None:
- self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
- return self.strRepr
-
- def streamline( self ):
- super(ParseExpression,self).streamline()
-
- for e in self.exprs:
- e.streamline()
-
- # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
- # but only if there are no parse actions or resultsNames on the nested And's
- # (likewise for Or's and MatchFirst's)
- if ( len(self.exprs) == 2 ):
- other = self.exprs[0]
- if ( isinstance( other, self.__class__ ) and
- not(other.parseAction) and
- other.resultsName is None and
- not other.debug ):
- self.exprs = other.exprs[:] + [ self.exprs[1] ]
- self.strRepr = None
- self.mayReturnEmpty |= other.mayReturnEmpty
- self.mayIndexError |= other.mayIndexError
-
- other = self.exprs[-1]
- if ( isinstance( other, self.__class__ ) and
- not(other.parseAction) and
- other.resultsName is None and
- not other.debug ):
- self.exprs = self.exprs[:-1] + other.exprs[:]
- self.strRepr = None
- self.mayReturnEmpty |= other.mayReturnEmpty
- self.mayIndexError |= other.mayIndexError
-
- self.errmsg = "Expected " + _ustr(self)
-
- return self
-
- def setResultsName( self, name, listAllMatches=False ):
- ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
- return ret
-
- def validate( self, validateTrace=[] ):
- tmp = validateTrace[:]+[self]
- for e in self.exprs:
- e.validate(tmp)
- self.checkRecursion( [] )
-
- def copy(self):
- ret = super(ParseExpression,self).copy()
- ret.exprs = [e.copy() for e in self.exprs]
- return ret
-
-class And(ParseExpression):
- """
- Requires all given C{ParseExpression}s to be found in the given order.
- Expressions may be separated by whitespace.
- May be constructed using the C{'+'} operator.
- May also be constructed using the C{'-'} operator, which will suppress backtracking.
-
- Example::
- integer = Word(nums)
- name_expr = OneOrMore(Word(alphas))
-
- expr = And([integer("id"),name_expr("name"),integer("age")])
- # more easily written as:
- expr = integer("id") + name_expr("name") + integer("age")
- """
-
- class _ErrorStop(Empty):
- def __init__(self, *args, **kwargs):
- super(And._ErrorStop,self).__init__(*args, **kwargs)
- self.name = '-'
- self.leaveWhitespace()
-
- def __init__( self, exprs, savelist = True ):
- super(And,self).__init__(exprs, savelist)
- self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
- self.setWhitespaceChars( self.exprs[0].whiteChars )
- self.skipWhitespace = self.exprs[0].skipWhitespace
- self.callPreparse = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- # pass False as last arg to _parse for first element, since we already
- # pre-parsed the string as part of our And pre-parsing
- loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
- errorStop = False
- for e in self.exprs[1:]:
- if isinstance(e, And._ErrorStop):
- errorStop = True
- continue
- if errorStop:
- try:
- loc, exprtokens = e._parse( instring, loc, doActions )
- except ParseSyntaxException:
- raise
- except ParseBaseException as pe:
- pe.__traceback__ = None
- raise ParseSyntaxException._from_exception(pe)
- except IndexError:
- raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
- else:
- loc, exprtokens = e._parse( instring, loc, doActions )
- if exprtokens or exprtokens.haskeys():
- resultlist += exprtokens
- return loc, resultlist
-
- def __iadd__(self, other ):
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- return self.append( other ) #And( [ self, other ] )
-
- def checkRecursion( self, parseElementList ):
- subRecCheckList = parseElementList[:] + [ self ]
- for e in self.exprs:
- e.checkRecursion( subRecCheckList )
- if not e.mayReturnEmpty:
- break
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
-
- return self.strRepr
-
-
-class Or(ParseExpression):
- """
- Requires that at least one C{ParseExpression} is found.
- If two expressions match, the expression that matches the longest string will be used.
- May be constructed using the C{'^'} operator.
-
- Example::
- # construct Or using '^' operator
-
- number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
- print(number.searchString("123 3.1416 789"))
- prints::
- [['123'], ['3.1416'], ['789']]
- """
- def __init__( self, exprs, savelist = False ):
- super(Or,self).__init__(exprs, savelist)
- if self.exprs:
- self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
- else:
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- maxExcLoc = -1
- maxException = None
- matches = []
- for e in self.exprs:
- try:
- loc2 = e.tryParse( instring, loc )
- except ParseException as err:
- err.__traceback__ = None
- if err.loc > maxExcLoc:
- maxException = err
- maxExcLoc = err.loc
- except IndexError:
- if len(instring) > maxExcLoc:
- maxException = ParseException(instring,len(instring),e.errmsg,self)
- maxExcLoc = len(instring)
- else:
- # save match among all matches, to retry longest to shortest
- matches.append((loc2, e))
-
- if matches:
- matches.sort(key=lambda x: -x[0])
- for _,e in matches:
- try:
- return e._parse( instring, loc, doActions )
- except ParseException as err:
- err.__traceback__ = None
- if err.loc > maxExcLoc:
- maxException = err
- maxExcLoc = err.loc
-
- if maxException is not None:
- maxException.msg = self.errmsg
- raise maxException
- else:
- raise ParseException(instring, loc, "no defined alternatives to match", self)
-
-
- def __ixor__(self, other ):
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- return self.append( other ) #Or( [ self, other ] )
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
-
- return self.strRepr
-
- def checkRecursion( self, parseElementList ):
- subRecCheckList = parseElementList[:] + [ self ]
- for e in self.exprs:
- e.checkRecursion( subRecCheckList )
-
-
-class MatchFirst(ParseExpression):
- """
- Requires that at least one C{ParseExpression} is found.
- If two expressions match, the first one listed is the one that will match.
- May be constructed using the C{'|'} operator.
-
- Example::
- # construct MatchFirst using '|' operator
-
- # watch the order of expressions to match
- number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
- print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
-
- # put more selective expression first
- number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
- print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
- """
- def __init__( self, exprs, savelist = False ):
- super(MatchFirst,self).__init__(exprs, savelist)
- if self.exprs:
- self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
- else:
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- maxExcLoc = -1
- maxException = None
- for e in self.exprs:
- try:
- ret = e._parse( instring, loc, doActions )
- return ret
- except ParseException as err:
- if err.loc > maxExcLoc:
- maxException = err
- maxExcLoc = err.loc
- except IndexError:
- if len(instring) > maxExcLoc:
- maxException = ParseException(instring,len(instring),e.errmsg,self)
- maxExcLoc = len(instring)
-
- # only got here if no expression matched, raise exception for match that made it the furthest
- else:
- if maxException is not None:
- maxException.msg = self.errmsg
- raise maxException
- else:
- raise ParseException(instring, loc, "no defined alternatives to match", self)
-
- def __ior__(self, other ):
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass( other )
- return self.append( other ) #MatchFirst( [ self, other ] )
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
-
- return self.strRepr
-
- def checkRecursion( self, parseElementList ):
- subRecCheckList = parseElementList[:] + [ self ]
- for e in self.exprs:
- e.checkRecursion( subRecCheckList )
-
-
-class Each(ParseExpression):
- """
- Requires all given C{ParseExpression}s to be found, but in any order.
- Expressions may be separated by whitespace.
- May be constructed using the C{'&'} operator.
-
- Example::
- color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
- shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
- integer = Word(nums)
- shape_attr = "shape:" + shape_type("shape")
- posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
- color_attr = "color:" + color("color")
- size_attr = "size:" + integer("size")
-
- # use Each (using operator '&') to accept attributes in any order
- # (shape and posn are required, color and size are optional)
- shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
-
- shape_spec.runTests('''
- shape: SQUARE color: BLACK posn: 100, 120
- shape: CIRCLE size: 50 color: BLUE posn: 50,80
- color:GREEN size:20 shape:TRIANGLE posn:20,40
- '''
- )
- prints::
- shape: SQUARE color: BLACK posn: 100, 120
- ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
- - color: BLACK
- - posn: ['100', ',', '120']
- - x: 100
- - y: 120
- - shape: SQUARE
-
-
- shape: CIRCLE size: 50 color: BLUE posn: 50,80
- ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
- - color: BLUE
- - posn: ['50', ',', '80']
- - x: 50
- - y: 80
- - shape: CIRCLE
- - size: 50
-
-
- color: GREEN size: 20 shape: TRIANGLE posn: 20,40
- ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
- - color: GREEN
- - posn: ['20', ',', '40']
- - x: 20
- - y: 40
- - shape: TRIANGLE
- - size: 20
- """
- def __init__( self, exprs, savelist = True ):
- super(Each,self).__init__(exprs, savelist)
- self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
- self.skipWhitespace = True
- self.initExprGroups = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- if self.initExprGroups:
- self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
- opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
- opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
- self.optionals = opt1 + opt2
- self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
- self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
- self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
- self.required += self.multirequired
- self.initExprGroups = False
- tmpLoc = loc
- tmpReqd = self.required[:]
- tmpOpt = self.optionals[:]
- matchOrder = []
-
- keepMatching = True
- while keepMatching:
- tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
- failed = []
- for e in tmpExprs:
- try:
- tmpLoc = e.tryParse( instring, tmpLoc )
- except ParseException:
- failed.append(e)
- else:
- matchOrder.append(self.opt1map.get(id(e),e))
- if e in tmpReqd:
- tmpReqd.remove(e)
- elif e in tmpOpt:
- tmpOpt.remove(e)
- if len(failed) == len(tmpExprs):
- keepMatching = False
-
- if tmpReqd:
- missing = ", ".join(_ustr(e) for e in tmpReqd)
- raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
-
- # add any unmatched Optionals, in case they have default values defined
- matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
-
- resultlist = []
- for e in matchOrder:
- loc,results = e._parse(instring,loc,doActions)
- resultlist.append(results)
-
- finalResults = sum(resultlist, ParseResults([]))
- return loc, finalResults
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
-
- return self.strRepr
-
- def checkRecursion( self, parseElementList ):
- subRecCheckList = parseElementList[:] + [ self ]
- for e in self.exprs:
- e.checkRecursion( subRecCheckList )
-
-
-class ParseElementEnhance(ParserElement):
- """
- Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
- """
- def __init__( self, expr, savelist=False ):
- super(ParseElementEnhance,self).__init__(savelist)
- if isinstance( expr, basestring ):
- if issubclass(ParserElement._literalStringClass, Token):
- expr = ParserElement._literalStringClass(expr)
- else:
- expr = ParserElement._literalStringClass(Literal(expr))
- self.expr = expr
- self.strRepr = None
- if expr is not None:
- self.mayIndexError = expr.mayIndexError
- self.mayReturnEmpty = expr.mayReturnEmpty
- self.setWhitespaceChars( expr.whiteChars )
- self.skipWhitespace = expr.skipWhitespace
- self.saveAsList = expr.saveAsList
- self.callPreparse = expr.callPreparse
- self.ignoreExprs.extend(expr.ignoreExprs)
-
- def parseImpl( self, instring, loc, doActions=True ):
- if self.expr is not None:
- return self.expr._parse( instring, loc, doActions, callPreParse=False )
- else:
- raise ParseException("",loc,self.errmsg,self)
-
- def leaveWhitespace( self ):
- self.skipWhitespace = False
- self.expr = self.expr.copy()
- if self.expr is not None:
- self.expr.leaveWhitespace()
- return self
-
- def ignore( self, other ):
- if isinstance( other, Suppress ):
- if other not in self.ignoreExprs:
- super( ParseElementEnhance, self).ignore( other )
- if self.expr is not None:
- self.expr.ignore( self.ignoreExprs[-1] )
- else:
- super( ParseElementEnhance, self).ignore( other )
- if self.expr is not None:
- self.expr.ignore( self.ignoreExprs[-1] )
- return self
-
- def streamline( self ):
- super(ParseElementEnhance,self).streamline()
- if self.expr is not None:
- self.expr.streamline()
- return self
-
- def checkRecursion( self, parseElementList ):
- if self in parseElementList:
- raise RecursiveGrammarException( parseElementList+[self] )
- subRecCheckList = parseElementList[:] + [ self ]
- if self.expr is not None:
- self.expr.checkRecursion( subRecCheckList )
-
- def validate( self, validateTrace=[] ):
- tmp = validateTrace[:]+[self]
- if self.expr is not None:
- self.expr.validate(tmp)
- self.checkRecursion( [] )
-
- def __str__( self ):
- try:
- return super(ParseElementEnhance,self).__str__()
- except Exception:
- pass
-
- if self.strRepr is None and self.expr is not None:
- self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
- return self.strRepr
-
-
-class FollowedBy(ParseElementEnhance):
- """
- Lookahead matching of the given parse expression. C{FollowedBy}
- does I{not} advance the parsing position within the input string, it only
- verifies that the specified parse expression matches at the current
- position. C{FollowedBy} always returns a null token list.
-
- Example::
- # use FollowedBy to match a label only if it is followed by a ':'
- data_word = Word(alphas)
- label = data_word + FollowedBy(':')
- attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
-
- OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
- prints::
- [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
- """
- def __init__( self, expr ):
- super(FollowedBy,self).__init__(expr)
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- self.expr.tryParse( instring, loc )
- return loc, []
-
-
-class NotAny(ParseElementEnhance):
- """
- Lookahead to disallow matching with the given parse expression. C{NotAny}
- does I{not} advance the parsing position within the input string, it only
- verifies that the specified parse expression does I{not} match at the current
- position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
- always returns a null token list. May be constructed using the '~' operator.
-
- Example::
-
- """
- def __init__( self, expr ):
- super(NotAny,self).__init__(expr)
- #~ self.leaveWhitespace()
- self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
- self.mayReturnEmpty = True
- self.errmsg = "Found unwanted token, "+_ustr(self.expr)
-
- def parseImpl( self, instring, loc, doActions=True ):
- if self.expr.canParseNext(instring, loc):
- raise ParseException(instring, loc, self.errmsg, self)
- return loc, []
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "~{" + _ustr(self.expr) + "}"
-
- return self.strRepr
-
-class _MultipleMatch(ParseElementEnhance):
- def __init__( self, expr, stopOn=None):
- super(_MultipleMatch, self).__init__(expr)
- self.saveAsList = True
- ender = stopOn
- if isinstance(ender, basestring):
- ender = ParserElement._literalStringClass(ender)
- self.not_ender = ~ender if ender is not None else None
-
- def parseImpl( self, instring, loc, doActions=True ):
- self_expr_parse = self.expr._parse
- self_skip_ignorables = self._skipIgnorables
- check_ender = self.not_ender is not None
- if check_ender:
- try_not_ender = self.not_ender.tryParse
-
- # must be at least one (but first see if we are the stopOn sentinel;
- # if so, fail)
- if check_ender:
- try_not_ender(instring, loc)
- loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
- try:
- hasIgnoreExprs = (not not self.ignoreExprs)
- while 1:
- if check_ender:
- try_not_ender(instring, loc)
- if hasIgnoreExprs:
- preloc = self_skip_ignorables( instring, loc )
- else:
- preloc = loc
- loc, tmptokens = self_expr_parse( instring, preloc, doActions )
- if tmptokens or tmptokens.haskeys():
- tokens += tmptokens
- except (ParseException,IndexError):
- pass
-
- return loc, tokens
-
-class OneOrMore(_MultipleMatch):
- """
- Repetition of one or more of the given expression.
-
- Parameters:
- - expr - expression that must match one or more times
- - stopOn - (default=C{None}) - expression for a terminating sentinel
- (only required if the sentinel would ordinarily match the repetition
- expression)
-
- Example::
- data_word = Word(alphas)
- label = data_word + FollowedBy(':')
- attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
-
- text = "shape: SQUARE posn: upper left color: BLACK"
- OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
-
- # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
- attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
- OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
-
- # could also be written as
- (attr_expr * (1,)).parseString(text).pprint()
- """
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "{" + _ustr(self.expr) + "}..."
-
- return self.strRepr
-
-class ZeroOrMore(_MultipleMatch):
- """
- Optional repetition of zero or more of the given expression.
-
- Parameters:
- - expr - expression that must match zero or more times
- - stopOn - (default=C{None}) - expression for a terminating sentinel
- (only required if the sentinel would ordinarily match the repetition
- expression)
-
- Example: similar to L{OneOrMore}
- """
- def __init__( self, expr, stopOn=None):
- super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- try:
- return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
- except (ParseException,IndexError):
- return loc, []
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "[" + _ustr(self.expr) + "]..."
-
- return self.strRepr
-
-class _NullToken(object):
- def __bool__(self):
- return False
- __nonzero__ = __bool__
- def __str__(self):
- return ""
-
-_optionalNotMatched = _NullToken()
-class Optional(ParseElementEnhance):
- """
- Optional matching of the given expression.
-
- Parameters:
- - expr - expression that must match zero or more times
- - default (optional) - value to be returned if the optional expression is not found.
-
- Example::
- # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
- zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
- zip.runTests('''
- # traditional ZIP code
- 12345
-
- # ZIP+4 form
- 12101-0001
-
- # invalid ZIP
- 98765-
- ''')
- prints::
- # traditional ZIP code
- 12345
- ['12345']
-
- # ZIP+4 form
- 12101-0001
- ['12101-0001']
-
- # invalid ZIP
- 98765-
- ^
- FAIL: Expected end of text (at char 5), (line:1, col:6)
- """
- def __init__( self, expr, default=_optionalNotMatched ):
- super(Optional,self).__init__( expr, savelist=False )
- self.saveAsList = self.expr.saveAsList
- self.defaultValue = default
- self.mayReturnEmpty = True
-
- def parseImpl( self, instring, loc, doActions=True ):
- try:
- loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
- except (ParseException,IndexError):
- if self.defaultValue is not _optionalNotMatched:
- if self.expr.resultsName:
- tokens = ParseResults([ self.defaultValue ])
- tokens[self.expr.resultsName] = self.defaultValue
- else:
- tokens = [ self.defaultValue ]
- else:
- tokens = []
- return loc, tokens
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
-
- if self.strRepr is None:
- self.strRepr = "[" + _ustr(self.expr) + "]"
-
- return self.strRepr
-
-class SkipTo(ParseElementEnhance):
- """
- Token for skipping over all undefined text until the matched expression is found.
-
- Parameters:
- - expr - target expression marking the end of the data to be skipped
- - include - (default=C{False}) if True, the target expression is also parsed
- (the skipped text and target expression are returned as a 2-element list).
- - ignore - (default=C{None}) used to define grammars (typically quoted strings and
- comments) that might contain false matches to the target expression
- - failOn - (default=C{None}) define expressions that are not allowed to be
- included in the skipped test; if found before the target expression is found,
- the SkipTo is not a match
-
- Example::
- report = '''
- Outstanding Issues Report - 1 Jan 2000
-
- # | Severity | Description | Days Open
- -----+----------+-------------------------------------------+-----------
- 101 | Critical | Intermittent system crash | 6
- 94 | Cosmetic | Spelling error on Login ('log|n') | 14
- 79 | Minor | System slow when running too many reports | 47
- '''
- integer = Word(nums)
- SEP = Suppress('|')
- # use SkipTo to simply match everything up until the next SEP
- # - ignore quoted strings, so that a '|' character inside a quoted string does not match
- # - parse action will call token.strip() for each matched token, i.e., the description body
- string_data = SkipTo(SEP, ignore=quotedString)
- string_data.setParseAction(tokenMap(str.strip))
- ticket_expr = (integer("issue_num") + SEP
- + string_data("sev") + SEP
- + string_data("desc") + SEP
- + integer("days_open"))
-
- for tkt in ticket_expr.searchString(report):
- print tkt.dump()
- prints::
- ['101', 'Critical', 'Intermittent system crash', '6']
- - days_open: 6
- - desc: Intermittent system crash
- - issue_num: 101
- - sev: Critical
- ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
- - days_open: 14
- - desc: Spelling error on Login ('log|n')
- - issue_num: 94
- - sev: Cosmetic
- ['79', 'Minor', 'System slow when running too many reports', '47']
- - days_open: 47
- - desc: System slow when running too many reports
- - issue_num: 79
- - sev: Minor
- """
- def __init__( self, other, include=False, ignore=None, failOn=None ):
- super( SkipTo, self ).__init__( other )
- self.ignoreExpr = ignore
- self.mayReturnEmpty = True
- self.mayIndexError = False
- self.includeMatch = include
- self.asList = False
- if isinstance(failOn, basestring):
- self.failOn = ParserElement._literalStringClass(failOn)
- else:
- self.failOn = failOn
- self.errmsg = "No match found for "+_ustr(self.expr)
-
- def parseImpl( self, instring, loc, doActions=True ):
- startloc = loc
- instrlen = len(instring)
- expr = self.expr
- expr_parse = self.expr._parse
- self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
- self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
-
- tmploc = loc
- while tmploc <= instrlen:
- if self_failOn_canParseNext is not None:
- # break if failOn expression matches
- if self_failOn_canParseNext(instring, tmploc):
- break
-
- if self_ignoreExpr_tryParse is not None:
- # advance past ignore expressions
- while 1:
- try:
- tmploc = self_ignoreExpr_tryParse(instring, tmploc)
- except ParseBaseException:
- break
-
- try:
- expr_parse(instring, tmploc, doActions=False, callPreParse=False)
- except (ParseException, IndexError):
- # no match, advance loc in string
- tmploc += 1
- else:
- # matched skipto expr, done
- break
-
- else:
- # ran off the end of the input string without matching skipto expr, fail
- raise ParseException(instring, loc, self.errmsg, self)
-
- # build up return values
- loc = tmploc
- skiptext = instring[startloc:loc]
- skipresult = ParseResults(skiptext)
-
- if self.includeMatch:
- loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
- skipresult += mat
-
- return loc, skipresult
-
-class Forward(ParseElementEnhance):
- """
- Forward declaration of an expression to be defined later -
- used for recursive grammars, such as algebraic infix notation.
- When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
-
- Note: take care when assigning to C{Forward} not to overlook precedence of operators.
- Specifically, '|' has a lower precedence than '<<', so that::
- fwdExpr << a | b | c
- will actually be evaluated as::
- (fwdExpr << a) | b | c
- thereby leaving b and c out as parseable alternatives. It is recommended that you
- explicitly group the values inserted into the C{Forward}::
- fwdExpr << (a | b | c)
- Converting to use the '<<=' operator instead will avoid this problem.
-
- See L{ParseResults.pprint} for an example of a recursive parser created using
- C{Forward}.
- """
- def __init__( self, other=None ):
- super(Forward,self).__init__( other, savelist=False )
-
- def __lshift__( self, other ):
- if isinstance( other, basestring ):
- other = ParserElement._literalStringClass(other)
- self.expr = other
- self.strRepr = None
- self.mayIndexError = self.expr.mayIndexError
- self.mayReturnEmpty = self.expr.mayReturnEmpty
- self.setWhitespaceChars( self.expr.whiteChars )
- self.skipWhitespace = self.expr.skipWhitespace
- self.saveAsList = self.expr.saveAsList
- self.ignoreExprs.extend(self.expr.ignoreExprs)
- return self
-
- def __ilshift__(self, other):
- return self << other
-
- def leaveWhitespace( self ):
- self.skipWhitespace = False
- return self
-
- def streamline( self ):
- if not self.streamlined:
- self.streamlined = True
- if self.expr is not None:
- self.expr.streamline()
- return self
-
- def validate( self, validateTrace=[] ):
- if self not in validateTrace:
- tmp = validateTrace[:]+[self]
- if self.expr is not None:
- self.expr.validate(tmp)
- self.checkRecursion([])
-
- def __str__( self ):
- if hasattr(self,"name"):
- return self.name
- return self.__class__.__name__ + ": ..."
-
- # stubbed out for now - creates awful memory and perf issues
- self._revertClass = self.__class__
- self.__class__ = _ForwardNoRecurse
- try:
- if self.expr is not None:
- retString = _ustr(self.expr)
- else:
- retString = "None"
- finally:
- self.__class__ = self._revertClass
- return self.__class__.__name__ + ": " + retString
-
- def copy(self):
- if self.expr is not None:
- return super(Forward,self).copy()
- else:
- ret = Forward()
- ret <<= self
- return ret
-
-class _ForwardNoRecurse(Forward):
- def __str__( self ):
- return "..."
-
-class TokenConverter(ParseElementEnhance):
- """
- Abstract subclass of C{ParseExpression}, for converting parsed results.
- """
- def __init__( self, expr, savelist=False ):
- super(TokenConverter,self).__init__( expr )#, savelist )
- self.saveAsList = False
-
-class Combine(TokenConverter):
- """
- Converter to concatenate all matching tokens to a single string.
- By default, the matching patterns must also be contiguous in the input string;
- this can be disabled by specifying C{'adjacent=False'} in the constructor.
-
- Example::
- real = Word(nums) + '.' + Word(nums)
- print(real.parseString('3.1416')) # -> ['3', '.', '1416']
- # will also erroneously match the following
- print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
-
- real = Combine(Word(nums) + '.' + Word(nums))
- print(real.parseString('3.1416')) # -> ['3.1416']
- # no match when there are internal spaces
- print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
- """
- def __init__( self, expr, joinString="", adjacent=True ):
- super(Combine,self).__init__( expr )
- # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
- if adjacent:
- self.leaveWhitespace()
- self.adjacent = adjacent
- self.skipWhitespace = True
- self.joinString = joinString
- self.callPreparse = True
-
- def ignore( self, other ):
- if self.adjacent:
- ParserElement.ignore(self, other)
- else:
- super( Combine, self).ignore( other )
- return self
-
- def postParse( self, instring, loc, tokenlist ):
- retToks = tokenlist.copy()
- del retToks[:]
- retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
-
- if self.resultsName and retToks.haskeys():
- return [ retToks ]
- else:
- return retToks
-
-class Group(TokenConverter):
- """
- Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
-
- Example::
- ident = Word(alphas)
- num = Word(nums)
- term = ident | num
- func = ident + Optional(delimitedList(term))
- print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
-
- func = ident + Group(Optional(delimitedList(term)))
- print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
- """
- def __init__( self, expr ):
- super(Group,self).__init__( expr )
- self.saveAsList = True
-
- def postParse( self, instring, loc, tokenlist ):
- return [ tokenlist ]
-
-class Dict(TokenConverter):
- """
- Converter to return a repetitive expression as a list, but also as a dictionary.
- Each element can also be referenced using the first token in the expression as its key.
- Useful for tabular report scraping when the first column can be used as a item key.
-
- Example::
- data_word = Word(alphas)
- label = data_word + FollowedBy(':')
- attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
-
- text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
- attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
-
- # print attributes as plain groups
- print(OneOrMore(attr_expr).parseString(text).dump())
-
- # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
- result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
- print(result.dump())
-
- # access named fields as dict entries, or output as dict
- print(result['shape'])
- print(result.asDict())
- prints::
- ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
-
- [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
- - color: light blue
- - posn: upper left
- - shape: SQUARE
- - texture: burlap
- SQUARE
- {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
- See more examples at L{ParseResults} of accessing fields by results name.
- """
- def __init__( self, expr ):
- super(Dict,self).__init__( expr )
- self.saveAsList = True
-
- def postParse( self, instring, loc, tokenlist ):
- for i,tok in enumerate(tokenlist):
- if len(tok) == 0:
- continue
- ikey = tok[0]
- if isinstance(ikey,int):
- ikey = _ustr(tok[0]).strip()
- if len(tok)==1:
- tokenlist[ikey] = _ParseResultsWithOffset("",i)
- elif len(tok)==2 and not isinstance(tok[1],ParseResults):
- tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
- else:
- dictvalue = tok.copy() #ParseResults(i)
- del dictvalue[0]
- if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
- tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
- else:
- tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
-
- if self.resultsName:
- return [ tokenlist ]
- else:
- return tokenlist
-
-
-class Suppress(TokenConverter):
- """
- Converter for ignoring the results of a parsed expression.
-
- Example::
- source = "a, b, c,d"
- wd = Word(alphas)
- wd_list1 = wd + ZeroOrMore(',' + wd)
- print(wd_list1.parseString(source))
-
- # often, delimiters that are useful during parsing are just in the
- # way afterward - use Suppress to keep them out of the parsed output
- wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
- print(wd_list2.parseString(source))
- prints::
- ['a', ',', 'b', ',', 'c', ',', 'd']
- ['a', 'b', 'c', 'd']
- (See also L{delimitedList}.)
- """
- def postParse( self, instring, loc, tokenlist ):
- return []
-
- def suppress( self ):
- return self
-
-
-class OnlyOnce(object):
- """
- Wrapper for parse actions, to ensure they are only called once.
- """
- def __init__(self, methodCall):
- self.callable = _trim_arity(methodCall)
- self.called = False
- def __call__(self,s,l,t):
- if not self.called:
- results = self.callable(s,l,t)
- self.called = True
- return results
- raise ParseException(s,l,"")
- def reset(self):
- self.called = False
-
-def traceParseAction(f):
- """
- Decorator for debugging parse actions.
-
- When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
- When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
-
- Example::
- wd = Word(alphas)
-
- @traceParseAction
- def remove_duplicate_chars(tokens):
- return ''.join(sorted(set(''.join(tokens))))
-
- wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
- print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
- prints::
- >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
- <<leaving remove_duplicate_chars (ret: 'dfjkls')
- ['dfjkls']
- """
- f = _trim_arity(f)
- def z(*paArgs):
- thisFunc = f.__name__
- s,l,t = paArgs[-3:]
- if len(paArgs)>3:
- thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
- sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
- try:
- ret = f(*paArgs)
- except Exception as exc:
- sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
- raise
- sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
- return ret
- try:
- z.__name__ = f.__name__
- except AttributeError:
- pass
- return z
-
-#
-# global helpers
-#
-def delimitedList( expr, delim=",", combine=False ):
- """
- Helper to define a delimited list of expressions - the delimiter defaults to ','.
- By default, the list elements and delimiters can have intervening whitespace, and
- comments, but this can be overridden by passing C{combine=True} in the constructor.
- If C{combine} is set to C{True}, the matching tokens are returned as a single token
- string, with the delimiters included; otherwise, the matching tokens are returned
- as a list of tokens, with the delimiters suppressed.
-
- Example::
- delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
- delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
- """
- dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
- if combine:
- return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
- else:
- return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
-
-def countedArray( expr, intExpr=None ):
- """
- Helper to define a counted list of expressions.
- This helper defines a pattern of the form::
- integer expr expr expr...
- where the leading integer tells how many expr expressions follow.
- The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
-
- If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
-
- Example::
- countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
-
- # in this parser, the leading integer value is given in binary,
- # '10' indicating that 2 values are in the array
- binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
- countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
- """
- arrayExpr = Forward()
- def countFieldParseAction(s,l,t):
- n = t[0]
- arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
- return []
- if intExpr is None:
- intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
- else:
- intExpr = intExpr.copy()
- intExpr.setName("arrayLen")
- intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
- return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
-
-def _flatten(L):
- ret = []
- for i in L:
- if isinstance(i,list):
- ret.extend(_flatten(i))
- else:
- ret.append(i)
- return ret
-
-def matchPreviousLiteral(expr):
- """
- Helper to define an expression that is indirectly defined from
- the tokens matched in a previous expression, that is, it looks
- for a 'repeat' of a previous expression. For example::
- first = Word(nums)
- second = matchPreviousLiteral(first)
- matchExpr = first + ":" + second
- will match C{"1:1"}, but not C{"1:2"}. Because this matches a
- previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
- If this is not desired, use C{matchPreviousExpr}.
- Do I{not} use with packrat parsing enabled.
- """
- rep = Forward()
- def copyTokenToRepeater(s,l,t):
- if t:
- if len(t) == 1:
- rep << t[0]
- else:
- # flatten t tokens
- tflat = _flatten(t.asList())
- rep << And(Literal(tt) for tt in tflat)
- else:
- rep << Empty()
- expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
- rep.setName('(prev) ' + _ustr(expr))
- return rep
-
-def matchPreviousExpr(expr):
- """
- Helper to define an expression that is indirectly defined from
- the tokens matched in a previous expression, that is, it looks
- for a 'repeat' of a previous expression. For example::
- first = Word(nums)
- second = matchPreviousExpr(first)
- matchExpr = first + ":" + second
- will match C{"1:1"}, but not C{"1:2"}. Because this matches by
- expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
- the expressions are evaluated first, and then compared, so
- C{"1"} is compared with C{"10"}.
- Do I{not} use with packrat parsing enabled.
- """
- rep = Forward()
- e2 = expr.copy()
- rep <<= e2
- def copyTokenToRepeater(s,l,t):
- matchTokens = _flatten(t.asList())
- def mustMatchTheseTokens(s,l,t):
- theseTokens = _flatten(t.asList())
- if theseTokens != matchTokens:
- raise ParseException("",0,"")
- rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
- expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
- rep.setName('(prev) ' + _ustr(expr))
- return rep
-
-def _escapeRegexRangeChars(s):
- #~ escape these chars: ^-]
- for c in r"\^-]":
- s = s.replace(c,_bslash+c)
- s = s.replace("\n",r"\n")
- s = s.replace("\t",r"\t")
- return _ustr(s)
-
-def oneOf( strs, caseless=False, useRegex=True ):
- """
- Helper to quickly define a set of alternative Literals, and makes sure to do
- longest-first testing when there is a conflict, regardless of the input order,
- but returns a C{L{MatchFirst}} for best performance.
-
- Parameters:
- - strs - a string of space-delimited literals, or a collection of string literals
- - caseless - (default=C{False}) - treat all literals as caseless
- - useRegex - (default=C{True}) - as an optimization, will generate a Regex
- object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
- if creating a C{Regex} raises an exception)
-
- Example::
- comp_oper = oneOf("< = > <= >= !=")
- var = Word(alphas)
- number = Word(nums)
- term = var | number
- comparison_expr = term + comp_oper + term
- print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
- prints::
- [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
- """
- if caseless:
- isequal = ( lambda a,b: a.upper() == b.upper() )
- masks = ( lambda a,b: b.upper().startswith(a.upper()) )
- parseElementClass = CaselessLiteral
- else:
- isequal = ( lambda a,b: a == b )
- masks = ( lambda a,b: b.startswith(a) )
- parseElementClass = Literal
-
- symbols = []
- if isinstance(strs,basestring):
- symbols = strs.split()
- elif isinstance(strs, Iterable):
- symbols = list(strs)
- else:
- warnings.warn("Invalid argument to oneOf, expected string or iterable",
- SyntaxWarning, stacklevel=2)
- if not symbols:
- return NoMatch()
-
- i = 0
- while i < len(symbols)-1:
- cur = symbols[i]
- for j,other in enumerate(symbols[i+1:]):
- if ( isequal(other, cur) ):
- del symbols[i+j+1]
- break
- elif ( masks(cur, other) ):
- del symbols[i+j+1]
- symbols.insert(i,other)
- cur = other
- break
- else:
- i += 1
-
- if not caseless and useRegex:
- #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
- try:
- if len(symbols)==len("".join(symbols)):
- return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
- else:
- return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
- except Exception:
- warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
- SyntaxWarning, stacklevel=2)
-
-
- # last resort, just use MatchFirst
- return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
-
-def dictOf( key, value ):
- """
- Helper to easily and clearly define a dictionary by specifying the respective patterns
- for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
- in the proper order. The key pattern can include delimiting markers or punctuation,
- as long as they are suppressed, thereby leaving the significant key text. The value
- pattern can include named results, so that the C{Dict} results can include named token
- fields.
-
- Example::
- text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
- attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
- print(OneOrMore(attr_expr).parseString(text).dump())
-
- attr_label = label
- attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
-
- # similar to Dict, but simpler call format
- result = dictOf(attr_label, attr_value).parseString(text)
- print(result.dump())
- print(result['shape'])
- print(result.shape) # object attribute access works too
- print(result.asDict())
- prints::
- [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
- - color: light blue
- - posn: upper left
- - shape: SQUARE
- - texture: burlap
- SQUARE
- SQUARE
- {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
- """
- return Dict( ZeroOrMore( Group ( key + value ) ) )
-
-def originalTextFor(expr, asString=True):
- """
- Helper to return the original, untokenized text for a given expression. Useful to
- restore the parsed fields of an HTML start tag into the raw tag text itself, or to
- revert separate tokens with intervening whitespace back to the original matching
- input text. By default, returns astring containing the original parsed text.
-
- If the optional C{asString} argument is passed as C{False}, then the return value is a
- C{L{ParseResults}} containing any results names that were originally matched, and a
- single token containing the original matched text from the input string. So if
- the expression passed to C{L{originalTextFor}} contains expressions with defined
- results names, you must set C{asString} to C{False} if you want to preserve those
- results name values.
-
- Example::
- src = "this is test <b> bold <i>text</i> </b> normal text "
- for tag in ("b","i"):
- opener,closer = makeHTMLTags(tag)
- patt = originalTextFor(opener + SkipTo(closer) + closer)
- print(patt.searchString(src)[0])
- prints::
- ['<b> bold <i>text</i> </b>']
- ['<i>text</i>']
- """
- locMarker = Empty().setParseAction(lambda s,loc,t: loc)
- endlocMarker = locMarker.copy()
- endlocMarker.callPreparse = False
- matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
- if asString:
- extractText = lambda s,l,t: s[t._original_start:t._original_end]
- else:
- def extractText(s,l,t):
- t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
- matchExpr.setParseAction(extractText)
- matchExpr.ignoreExprs = expr.ignoreExprs
- return matchExpr
-
-def ungroup(expr):
- """
- Helper to undo pyparsing's default grouping of And expressions, even
- if all but one are non-empty.
- """
- return TokenConverter(expr).setParseAction(lambda t:t[0])
-
-def locatedExpr(expr):
- """
- Helper to decorate a returned token with its starting and ending locations in the input string.
- This helper adds the following results names:
- - locn_start = location where matched expression begins
- - locn_end = location where matched expression ends
- - value = the actual parsed results
-
- Be careful if the input text contains C{<TAB>} characters, you may want to call
- C{L{ParserElement.parseWithTabs}}
-
- Example::
- wd = Word(alphas)
- for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
- print(match)
- prints::
- [[0, 'ljsdf', 5]]
- [[8, 'lksdjjf', 15]]
- [[18, 'lkkjj', 23]]
- """
- locator = Empty().setParseAction(lambda s,l,t: l)
- return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
-
-
-# convenience constants for positional expressions
-empty = Empty().setName("empty")
-lineStart = LineStart().setName("lineStart")
-lineEnd = LineEnd().setName("lineEnd")
-stringStart = StringStart().setName("stringStart")
-stringEnd = StringEnd().setName("stringEnd")
-
-_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
-_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
-_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
-_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
-_charRange = Group(_singleChar + Suppress("-") + _singleChar)
-_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
-
-def srange(s):
- r"""
- Helper to easily define string ranges for use in Word construction. Borrows
- syntax from regexp '[]' string range definitions::
- srange("[0-9]") -> "0123456789"
- srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
- srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
- The input string must be enclosed in []'s, and the returned string is the expanded
- character set joined into a single string.
- The values enclosed in the []'s may be:
- - a single character
- - an escaped character with a leading backslash (such as C{\-} or C{\]})
- - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
- (C{\0x##} is also supported for backwards compatibility)
- - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
- - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
- - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
- """
- _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
- try:
- return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
- except Exception:
- return ""
-
-def matchOnlyAtCol(n):
- """
- Helper method for defining parse actions that require matching at a specific
- column in the input text.
- """
- def verifyCol(strg,locn,toks):
- if col(locn,strg) != n:
- raise ParseException(strg,locn,"matched token not at column %d" % n)
- return verifyCol
-
-def replaceWith(replStr):
- """
- Helper method for common parse actions that simply return a literal value. Especially
- useful when used with C{L{transformString<ParserElement.transformString>}()}.
-
- Example::
- num = Word(nums).setParseAction(lambda toks: int(toks[0]))
- na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
- term = na | num
-
- OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
- """
- return lambda s,l,t: [replStr]
-
-def removeQuotes(s,l,t):
- """
- Helper parse action for removing quotation marks from parsed quoted strings.
-
- Example::
- # by default, quotation marks are included in parsed results
- quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
-
- # use removeQuotes to strip quotation marks from parsed results
- quotedString.setParseAction(removeQuotes)
- quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
- """
- return t[0][1:-1]
-
-def tokenMap(func, *args):
- """
- Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
- args are passed, they are forwarded to the given function as additional arguments after
- the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
- parsed data to an integer using base 16.
-
- Example (compare the last to example in L{ParserElement.transformString}::
- hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
- hex_ints.runTests('''
- 00 11 22 aa FF 0a 0d 1a
- ''')
-
- upperword = Word(alphas).setParseAction(tokenMap(str.upper))
- OneOrMore(upperword).runTests('''
- my kingdom for a horse
- ''')
-
- wd = Word(alphas).setParseAction(tokenMap(str.title))
- OneOrMore(wd).setParseAction(' '.join).runTests('''
- now is the winter of our discontent made glorious summer by this sun of york
- ''')
- prints::
- 00 11 22 aa FF 0a 0d 1a
- [0, 17, 34, 170, 255, 10, 13, 26]
-
- my kingdom for a horse
- ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
-
- now is the winter of our discontent made glorious summer by this sun of york
- ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
- """
- def pa(s,l,t):
- return [func(tokn, *args) for tokn in t]
-
- try:
- func_name = getattr(func, '__name__',
- getattr(func, '__class__').__name__)
- except Exception:
- func_name = str(func)
- pa.__name__ = func_name
-
- return pa
-
-upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
-"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
-
-downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
-"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
-
-def _makeTags(tagStr, xml):
- """Internal helper to construct opening and closing tag expressions, given a tag name"""
- if isinstance(tagStr,basestring):
- resname = tagStr
- tagStr = Keyword(tagStr, caseless=not xml)
- else:
- resname = tagStr.name
-
- tagAttrName = Word(alphas,alphanums+"_-:")
- if (xml):
- tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
- openTag = Suppress("<") + tagStr("tag") + \
- Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
- Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
- else:
- printablesLessRAbrack = "".join(c for c in printables if c not in ">")
- tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
- openTag = Suppress("<") + tagStr("tag") + \
- Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
- Optional( Suppress("=") + tagAttrValue ) ))) + \
- Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
- closeTag = Combine(_L("</") + tagStr + ">")
-
- openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
- closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
- openTag.tag = resname
- closeTag.tag = resname
- return openTag, closeTag
-
-def makeHTMLTags(tagStr):
- """
- Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
- tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
-
- Example::
- text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
- # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
- a,a_end = makeHTMLTags("A")
- link_expr = a + SkipTo(a_end)("link_text") + a_end
-
- for link in link_expr.searchString(text):
- # attributes in the <A> tag (like "href" shown here) are also accessible as named results
- print(link.link_text, '->', link.href)
- prints::
- pyparsing -> http://pyparsing.wikispaces.com
- """
- return _makeTags( tagStr, False )
-
-def makeXMLTags(tagStr):
- """
- Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
- tags only in the given upper/lower case.
-
- Example: similar to L{makeHTMLTags}
- """
- return _makeTags( tagStr, True )
-
-def withAttribute(*args,**attrDict):
- """
- Helper to create a validating parse action to be used with start tags created
- with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
- with a required attribute value, to avoid false matches on common tags such as
- C{<TD>} or C{<DIV>}.
-
- Call C{withAttribute} with a series of attribute names and values. Specify the list
- of filter attributes names and values as:
- - keyword arguments, as in C{(align="right")}, or
- - as an explicit dict with C{**} operator, when an attribute name is also a Python
- reserved word, as in C{**{"class":"Customer", "align":"right"}}
- - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
- For attribute names with a namespace prefix, you must use the second form. Attribute
- names are matched insensitive to upper/lower case.
-
- If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
-
- To verify that the attribute exists, but without specifying a value, pass
- C{withAttribute.ANY_VALUE} as the value.
-
- Example::
- html = '''
- <div>
- Some text
- <div type="grid">1 4 0 1 0</div>
- <div type="graph">1,3 2,3 1,1</div>
- <div>this has no type</div>
- </div>
-
- '''
- div,div_end = makeHTMLTags("div")
-
- # only match div tag having a type attribute with value "grid"
- div_grid = div().setParseAction(withAttribute(type="grid"))
- grid_expr = div_grid + SkipTo(div | div_end)("body")
- for grid_header in grid_expr.searchString(html):
- print(grid_header.body)
-
- # construct a match with any div tag having a type attribute, regardless of the value
- div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
- div_expr = div_any_type + SkipTo(div | div_end)("body")
- for div_header in div_expr.searchString(html):
- print(div_header.body)
- prints::
- 1 4 0 1 0
-
- 1 4 0 1 0
- 1,3 2,3 1,1
- """
- if args:
- attrs = args[:]
- else:
- attrs = attrDict.items()
- attrs = [(k,v) for k,v in attrs]
- def pa(s,l,tokens):
- for attrName,attrValue in attrs:
- if attrName not in tokens:
- raise ParseException(s,l,"no matching attribute " + attrName)
- if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
- raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
- (attrName, tokens[attrName], attrValue))
- return pa
-withAttribute.ANY_VALUE = object()
-
-def withClass(classname, namespace=''):
- """
- Simplified version of C{L{withAttribute}} when matching on a div class - made
- difficult because C{class} is a reserved word in Python.
-
- Example::
- html = '''
- <div>
- Some text
- <div class="grid">1 4 0 1 0</div>
- <div class="graph">1,3 2,3 1,1</div>
- <div>this <div> has no class</div>
- </div>
-
- '''
- div,div_end = makeHTMLTags("div")
- div_grid = div().setParseAction(withClass("grid"))
-
- grid_expr = div_grid + SkipTo(div | div_end)("body")
- for grid_header in grid_expr.searchString(html):
- print(grid_header.body)
-
- div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
- div_expr = div_any_type + SkipTo(div | div_end)("body")
- for div_header in div_expr.searchString(html):
- print(div_header.body)
- prints::
- 1 4 0 1 0
-
- 1 4 0 1 0
- 1,3 2,3 1,1
- """
- classattr = "%s:class" % namespace if namespace else "class"
- return withAttribute(**{classattr : classname})
-
-opAssoc = _Constants()
-opAssoc.LEFT = object()
-opAssoc.RIGHT = object()
-
-def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
- """
- Helper method for constructing grammars of expressions made up of
- operators working in a precedence hierarchy. Operators may be unary or
- binary, left- or right-associative. Parse actions can also be attached
- to operator expressions. The generated parser will also recognize the use
- of parentheses to override operator precedences (see example below).
-
- Note: if you define a deep operator list, you may see performance issues
- when using infixNotation. See L{ParserElement.enablePackrat} for a
- mechanism to potentially improve your parser performance.
-
- Parameters:
- - baseExpr - expression representing the most basic element for the nested
- - opList - list of tuples, one for each operator precedence level in the
- expression grammar; each tuple is of the form
- (opExpr, numTerms, rightLeftAssoc, parseAction), where:
- - opExpr is the pyparsing expression for the operator;
- may also be a string, which will be converted to a Literal;
- if numTerms is 3, opExpr is a tuple of two expressions, for the
- two operators separating the 3 terms
- - numTerms is the number of terms for this operator (must
- be 1, 2, or 3)
- - rightLeftAssoc is the indicator whether the operator is
- right or left associative, using the pyparsing-defined
- constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
- - parseAction is the parse action to be associated with
- expressions matching this operator expression (the
- parse action tuple member may be omitted); if the parse action
- is passed a tuple or list of functions, this is equivalent to
- calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
- - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
- - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
-
- Example::
- # simple example of four-function arithmetic with ints and variable names
- integer = pyparsing_common.signed_integer
- varname = pyparsing_common.identifier
-
- arith_expr = infixNotation(integer | varname,
- [
- ('-', 1, opAssoc.RIGHT),
- (oneOf('* /'), 2, opAssoc.LEFT),
- (oneOf('+ -'), 2, opAssoc.LEFT),
- ])
-
- arith_expr.runTests('''
- 5+3*6
- (5+3)*6
- -2--11
- ''', fullDump=False)
- prints::
- 5+3*6
- [[5, '+', [3, '*', 6]]]
-
- (5+3)*6
- [[[5, '+', 3], '*', 6]]
-
- -2--11
- [[['-', 2], '-', ['-', 11]]]
- """
- ret = Forward()
- lastExpr = baseExpr | ( lpar + ret + rpar )
- for i,operDef in enumerate(opList):
- opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
- termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
- if arity == 3:
- if opExpr is None or len(opExpr) != 2:
- raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
- opExpr1, opExpr2 = opExpr
- thisExpr = Forward().setName(termName)
- if rightLeftAssoc == opAssoc.LEFT:
- if arity == 1:
- matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
- elif arity == 2:
- if opExpr is not None:
- matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
- else:
- matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
- elif arity == 3:
- matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
- Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
- else:
- raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
- elif rightLeftAssoc == opAssoc.RIGHT:
- if arity == 1:
- # try to avoid LR with this extra test
- if not isinstance(opExpr, Optional):
- opExpr = Optional(opExpr)
- matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
- elif arity == 2:
- if opExpr is not None:
- matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
- else:
- matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
- elif arity == 3:
- matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
- Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
- else:
- raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
- else:
- raise ValueError("operator must indicate right or left associativity")
- if pa:
- if isinstance(pa, (tuple, list)):
- matchExpr.setParseAction(*pa)
- else:
- matchExpr.setParseAction(pa)
- thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
- lastExpr = thisExpr
- ret <<= lastExpr
- return ret
-
-operatorPrecedence = infixNotation
-"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
-
-dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
-sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
-quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
- Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
-unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
-
-def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
- """
- Helper method for defining nested lists enclosed in opening and closing
- delimiters ("(" and ")" are the default).
-
- Parameters:
- - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
- - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
- - content - expression for items within the nested lists (default=C{None})
- - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
-
- If an expression is not provided for the content argument, the nested
- expression will capture all whitespace-delimited content between delimiters
- as a list of separate values.
-
- Use the C{ignoreExpr} argument to define expressions that may contain
- opening or closing characters that should not be treated as opening
- or closing characters for nesting, such as quotedString or a comment
- expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
- The default is L{quotedString}, but if no expressions are to be ignored,
- then pass C{None} for this argument.
-
- Example::
- data_type = oneOf("void int short long char float double")
- decl_data_type = Combine(data_type + Optional(Word('*')))
- ident = Word(alphas+'_', alphanums+'_')
- number = pyparsing_common.number
- arg = Group(decl_data_type + ident)
- LPAR,RPAR = map(Suppress, "()")
-
- code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
-
- c_function = (decl_data_type("type")
- + ident("name")
- + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
- + code_body("body"))
- c_function.ignore(cStyleComment)
-
- source_code = '''
- int is_odd(int x) {
- return (x%2);
- }
-
- int dec_to_hex(char hchar) {
- if (hchar >= '0' && hchar <= '9') {
- return (ord(hchar)-ord('0'));
- } else {
- return (10+ord(hchar)-ord('A'));
- }
- }
- '''
- for func in c_function.searchString(source_code):
- print("%(name)s (%(type)s) args: %(args)s" % func)
-
- prints::
- is_odd (int) args: [['int', 'x']]
- dec_to_hex (int) args: [['char', 'hchar']]
- """
- if opener == closer:
- raise ValueError("opening and closing strings cannot be the same")
- if content is None:
- if isinstance(opener,basestring) and isinstance(closer,basestring):
- if len(opener) == 1 and len(closer)==1:
- if ignoreExpr is not None:
- content = (Combine(OneOrMore(~ignoreExpr +
- CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
- else:
- content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
- ).setParseAction(lambda t:t[0].strip()))
- else:
- if ignoreExpr is not None:
- content = (Combine(OneOrMore(~ignoreExpr +
- ~Literal(opener) + ~Literal(closer) +
- CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
- else:
- content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
- CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
- ).setParseAction(lambda t:t[0].strip()))
- else:
- raise ValueError("opening and closing arguments must be strings if no content expression is given")
- ret = Forward()
- if ignoreExpr is not None:
- ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
- else:
- ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
- ret.setName('nested %s%s expression' % (opener,closer))
- return ret
-
-def indentedBlock(blockStatementExpr, indentStack, indent=True):
- """
- Helper method for defining space-delimited indentation blocks, such as
- those used to define block statements in Python source code.
-
- Parameters:
- - blockStatementExpr - expression defining syntax of statement that
- is repeated within the indented block
- - indentStack - list created by caller to manage indentation stack
- (multiple statementWithIndentedBlock expressions within a single grammar
- should share a common indentStack)
- - indent - boolean indicating whether block must be indented beyond the
- the current level; set to False for block of left-most statements
- (default=C{True})
-
- A valid block must contain at least one C{blockStatement}.
-
- Example::
- data = '''
- def A(z):
- A1
- B = 100
- G = A2
- A2
- A3
- B
- def BB(a,b,c):
- BB1
- def BBA():
- bba1
- bba2
- bba3
- C
- D
- def spam(x,y):
- def eggs(z):
- pass
- '''
-
-
- indentStack = [1]
- stmt = Forward()
-
- identifier = Word(alphas, alphanums)
- funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
- func_body = indentedBlock(stmt, indentStack)
- funcDef = Group( funcDecl + func_body )
-
- rvalue = Forward()
- funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
- rvalue << (funcCall | identifier | Word(nums))
- assignment = Group(identifier + "=" + rvalue)
- stmt << ( funcDef | assignment | identifier )
-
- module_body = OneOrMore(stmt)
-
- parseTree = module_body.parseString(data)
- parseTree.pprint()
- prints::
- [['def',
- 'A',
- ['(', 'z', ')'],
- ':',
- [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
- 'B',
- ['def',
- 'BB',
- ['(', 'a', 'b', 'c', ')'],
- ':',
- [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
- 'C',
- 'D',
- ['def',
- 'spam',
- ['(', 'x', 'y', ')'],
- ':',
- [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
- """
- def checkPeerIndent(s,l,t):
- if l >= len(s): return
- curCol = col(l,s)
- if curCol != indentStack[-1]:
- if curCol > indentStack[-1]:
- raise ParseFatalException(s,l,"illegal nesting")
- raise ParseException(s,l,"not a peer entry")
-
- def checkSubIndent(s,l,t):
- curCol = col(l,s)
- if curCol > indentStack[-1]:
- indentStack.append( curCol )
- else:
- raise ParseException(s,l,"not a subentry")
-
- def checkUnindent(s,l,t):
- if l >= len(s): return
- curCol = col(l,s)
- if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
- raise ParseException(s,l,"not an unindent")
- indentStack.pop()
-
- NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
- INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
- PEER = Empty().setParseAction(checkPeerIndent).setName('')
- UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
- if indent:
- smExpr = Group( Optional(NL) +
- #~ FollowedBy(blockStatementExpr) +
- INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
- else:
- smExpr = Group( Optional(NL) +
- (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
- blockStatementExpr.ignore(_bslash + LineEnd())
- return smExpr.setName('indented block')
-
-alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
-punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
-
-anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
-_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
-commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
-def replaceHTMLEntity(t):
- """Helper parser action to replace common HTML entities with their special characters"""
- return _htmlEntityMap.get(t.entity)
-
-# it's easy to get these comment structures wrong - they're very common, so may as well make them available
-cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
-"Comment of the form C{/* ... */}"
-
-htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
-"Comment of the form C{<!-- ... -->}"
-
-restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
-dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
-"Comment of the form C{// ... (to end of line)}"
-
-cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
-"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
-
-javaStyleComment = cppStyleComment
-"Same as C{L{cppStyleComment}}"
-
-pythonStyleComment = Regex(r"#.*").setName("Python style comment")
-"Comment of the form C{# ... (to end of line)}"
-
-_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
- Optional( Word(" \t") +
- ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
-commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
-"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
- This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
-
-# some other useful expressions - using lower-case class name since we are really using this as a namespace
-class pyparsing_common:
- """
- Here are some common low-level expressions that may be useful in jump-starting parser development:
- - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
- - common L{programming identifiers<identifier>}
- - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
- - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
- - L{UUID<uuid>}
- - L{comma-separated list<comma_separated_list>}
- Parse actions:
- - C{L{convertToInteger}}
- - C{L{convertToFloat}}
- - C{L{convertToDate}}
- - C{L{convertToDatetime}}
- - C{L{stripHTMLTags}}
- - C{L{upcaseTokens}}
- - C{L{downcaseTokens}}
-
- Example::
- pyparsing_common.number.runTests('''
- # any int or real number, returned as the appropriate type
- 100
- -100
- +100
- 3.14159
- 6.02e23
- 1e-12
- ''')
-
- pyparsing_common.fnumber.runTests('''
- # any int or real number, returned as float
- 100
- -100
- +100
- 3.14159
- 6.02e23
- 1e-12
- ''')
-
- pyparsing_common.hex_integer.runTests('''
- # hex numbers
- 100
- FF
- ''')
-
- pyparsing_common.fraction.runTests('''
- # fractions
- 1/2
- -3/4
- ''')
-
- pyparsing_common.mixed_integer.runTests('''
- # mixed fractions
- 1
- 1/2
- -3/4
- 1-3/4
- ''')
-
- import uuid
- pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
- pyparsing_common.uuid.runTests('''
- # uuid
- 12345678-1234-5678-1234-567812345678
- ''')
- prints::
- # any int or real number, returned as the appropriate type
- 100
- [100]
-
- -100
- [-100]
-
- +100
- [100]
-
- 3.14159
- [3.14159]
-
- 6.02e23
- [6.02e+23]
-
- 1e-12
- [1e-12]
-
- # any int or real number, returned as float
- 100
- [100.0]
-
- -100
- [-100.0]
-
- +100
- [100.0]
-
- 3.14159
- [3.14159]
-
- 6.02e23
- [6.02e+23]
-
- 1e-12
- [1e-12]
-
- # hex numbers
- 100
- [256]
-
- FF
- [255]
-
- # fractions
- 1/2
- [0.5]
-
- -3/4
- [-0.75]
-
- # mixed fractions
- 1
- [1]
-
- 1/2
- [0.5]
-
- -3/4
- [-0.75]
-
- 1-3/4
- [1.75]
-
- # uuid
- 12345678-1234-5678-1234-567812345678
- [UUID('12345678-1234-5678-1234-567812345678')]
- """
-
- convertToInteger = tokenMap(int)
- """
- Parse action for converting parsed integers to Python int
- """
-
- convertToFloat = tokenMap(float)
- """
- Parse action for converting parsed numbers to Python float
- """
-
- integer = Word(nums).setName("integer").setParseAction(convertToInteger)
- """expression that parses an unsigned integer, returns an int"""
-
- hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
- """expression that parses a hexadecimal integer, returns an int"""
-
- signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
- """expression that parses an integer with optional leading sign, returns an int"""
-
- fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
- """fractional expression of an integer divided by an integer, returns a float"""
- fraction.addParseAction(lambda t: t[0]/t[-1])
-
- mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
- """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
- mixed_integer.addParseAction(sum)
-
- real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
- """expression that parses a floating point number and returns a float"""
-
- sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
- """expression that parses a floating point number with optional scientific notation and returns a float"""
-
- # streamlining this expression makes the docs nicer-looking
- number = (sci_real | real | signed_integer).streamline()
- """any numeric expression, returns the corresponding Python type"""
-
- fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
- """any int or real number, returned as float"""
-
- identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
- """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
-
- ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
- "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
-
- _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
- _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
- _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
- _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
- _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
- ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
- "IPv6 address (long, short, or mixed form)"
-
- mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
- "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
-
- @staticmethod
- def convertToDate(fmt="%Y-%m-%d"):
- """
- Helper to create a parse action for converting parsed date string to Python datetime.date
-
- Params -
- - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
-
- Example::
- date_expr = pyparsing_common.iso8601_date.copy()
- date_expr.setParseAction(pyparsing_common.convertToDate())
- print(date_expr.parseString("1999-12-31"))
- prints::
- [datetime.date(1999, 12, 31)]
- """
- def cvt_fn(s,l,t):
- try:
- return datetime.strptime(t[0], fmt).date()
- except ValueError as ve:
- raise ParseException(s, l, str(ve))
- return cvt_fn
-
- @staticmethod
- def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
- """
- Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
-
- Params -
- - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
-
- Example::
- dt_expr = pyparsing_common.iso8601_datetime.copy()
- dt_expr.setParseAction(pyparsing_common.convertToDatetime())
- print(dt_expr.parseString("1999-12-31T23:59:59.999"))
- prints::
- [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
- """
- def cvt_fn(s,l,t):
- try:
- return datetime.strptime(t[0], fmt)
- except ValueError as ve:
- raise ParseException(s, l, str(ve))
- return cvt_fn
-
- iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
- "ISO8601 date (C{yyyy-mm-dd})"
-
- iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
- "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
-
- uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
- "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
-
- _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
- @staticmethod
- def stripHTMLTags(s, l, tokens):
- """
- Parse action to remove HTML tags from web page HTML source
-
- Example::
- # strip HTML links from normal text
- text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
- td,td_end = makeHTMLTags("TD")
- table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
-
- print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
- """
- return pyparsing_common._html_stripper.transformString(tokens[0])
-
- _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
- + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
- comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
- """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
-
- upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
- """Parse action to convert tokens to upper case."""
-
- downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
- """Parse action to convert tokens to lower case."""
-
-
-if __name__ == "__main__":
-
- selectToken = CaselessLiteral("select")
- fromToken = CaselessLiteral("from")
-
- ident = Word(alphas, alphanums + "_$")
-
- columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
- columnNameList = Group(delimitedList(columnName)).setName("columns")
- columnSpec = ('*' | columnNameList)
-
- tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
- tableNameList = Group(delimitedList(tableName)).setName("tables")
-
- simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
-
- # demo runTests method, including embedded comments in test string
- simpleSQL.runTests("""
- # '*' as column list and dotted table name
- select * from SYS.XYZZY
-
- # caseless match on "SELECT", and casts back to "select"
- SELECT * from XYZZY, ABC
-
- # list of column names, and mixed case SELECT keyword
- Select AA,BB,CC from Sys.dual
-
- # multiple tables
- Select A, B, C from Sys.dual, Table2
-
- # invalid SELECT keyword - should fail
- Xelect A, B, C from Sys.dual
-
- # incomplete command - should fail
- Select
-
- # invalid column name - should fail
- Select ^^^ frox Sys.dual
-
- """)
-
- pyparsing_common.number.runTests("""
- 100
- -100
- +100
- 3.14159
- 6.02e23
- 1e-12
- """)
-
- # any int or real number, returned as float
- pyparsing_common.fnumber.runTests("""
- 100
- -100
- +100
- 3.14159
- 6.02e23
- 1e-12
- """)
-
- pyparsing_common.hex_integer.runTests("""
- 100
- FF
- """)
-
- import uuid
- pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
- pyparsing_common.uuid.runTests("""
- 12345678-1234-5678-1234-567812345678
- """)
diff --git a/third_party/python/setuptools/pkg_resources/_vendor/typing_extensions.py b/third_party/python/setuptools/pkg_resources/_vendor/typing_extensions.py new file mode 100644 index 0000000000..ef42417c20 --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/typing_extensions.py @@ -0,0 +1,2209 @@ +import abc +import collections +import collections.abc +import functools +import operator +import sys +import types as _types +import typing + + +__all__ = [ + # Super-special typing primitives. + 'Any', + 'ClassVar', + 'Concatenate', + 'Final', + 'LiteralString', + 'ParamSpec', + 'ParamSpecArgs', + 'ParamSpecKwargs', + 'Self', + 'Type', + 'TypeVar', + 'TypeVarTuple', + 'Unpack', + + # ABCs (from collections.abc). + 'Awaitable', + 'AsyncIterator', + 'AsyncIterable', + 'Coroutine', + 'AsyncGenerator', + 'AsyncContextManager', + 'ChainMap', + + # Concrete collection types. + 'ContextManager', + 'Counter', + 'Deque', + 'DefaultDict', + 'NamedTuple', + 'OrderedDict', + 'TypedDict', + + # Structural checks, a.k.a. protocols. + 'SupportsIndex', + + # One-off things. + 'Annotated', + 'assert_never', + 'assert_type', + 'clear_overloads', + 'dataclass_transform', + 'get_overloads', + 'final', + 'get_args', + 'get_origin', + 'get_type_hints', + 'IntVar', + 'is_typeddict', + 'Literal', + 'NewType', + 'overload', + 'override', + 'Protocol', + 'reveal_type', + 'runtime', + 'runtime_checkable', + 'Text', + 'TypeAlias', + 'TypeGuard', + 'TYPE_CHECKING', + 'Never', + 'NoReturn', + 'Required', + 'NotRequired', +] + +# for backward compatibility +PEP_560 = True +GenericMeta = type + +# The functions below are modified copies of typing internal helpers. +# They are needed by _ProtocolMeta and they provide support for PEP 646. + +_marker = object() + + +def _check_generic(cls, parameters, elen=_marker): + """Check correct count for parameters of a generic cls (internal helper). + This gives a nice error message in case of count mismatch. + """ + if not elen: + raise TypeError(f"{cls} is not a generic class") + if elen is _marker: + if not hasattr(cls, "__parameters__") or not cls.__parameters__: + raise TypeError(f"{cls} is not a generic class") + elen = len(cls.__parameters__) + alen = len(parameters) + if alen != elen: + if hasattr(cls, "__parameters__"): + parameters = [p for p in cls.__parameters__ if not _is_unpack(p)] + num_tv_tuples = sum(isinstance(p, TypeVarTuple) for p in parameters) + if (num_tv_tuples > 0) and (alen >= elen - num_tv_tuples): + return + raise TypeError(f"Too {'many' if alen > elen else 'few'} parameters for {cls};" + f" actual {alen}, expected {elen}") + + +if sys.version_info >= (3, 10): + def _should_collect_from_parameters(t): + return isinstance( + t, (typing._GenericAlias, _types.GenericAlias, _types.UnionType) + ) +elif sys.version_info >= (3, 9): + def _should_collect_from_parameters(t): + return isinstance(t, (typing._GenericAlias, _types.GenericAlias)) +else: + def _should_collect_from_parameters(t): + return isinstance(t, typing._GenericAlias) and not t._special + + +def _collect_type_vars(types, typevar_types=None): + """Collect all type variable contained in types in order of + first appearance (lexicographic order). For example:: + + _collect_type_vars((T, List[S, T])) == (T, S) + """ + if typevar_types is None: + typevar_types = typing.TypeVar + tvars = [] + for t in types: + if ( + isinstance(t, typevar_types) and + t not in tvars and + not _is_unpack(t) + ): + tvars.append(t) + if _should_collect_from_parameters(t): + tvars.extend([t for t in t.__parameters__ if t not in tvars]) + return tuple(tvars) + + +NoReturn = typing.NoReturn + +# Some unconstrained type variables. These are used by the container types. +# (These are not for export.) +T = typing.TypeVar('T') # Any type. +KT = typing.TypeVar('KT') # Key type. +VT = typing.TypeVar('VT') # Value type. +T_co = typing.TypeVar('T_co', covariant=True) # Any type covariant containers. +T_contra = typing.TypeVar('T_contra', contravariant=True) # Ditto contravariant. + + +if sys.version_info >= (3, 11): + from typing import Any +else: + + class _AnyMeta(type): + def __instancecheck__(self, obj): + if self is Any: + raise TypeError("typing_extensions.Any cannot be used with isinstance()") + return super().__instancecheck__(obj) + + def __repr__(self): + if self is Any: + return "typing_extensions.Any" + return super().__repr__() + + class Any(metaclass=_AnyMeta): + """Special type indicating an unconstrained type. + - Any is compatible with every type. + - Any assumed to have all methods. + - All values assumed to be instances of Any. + Note that all the above statements are true from the point of view of + static type checkers. At runtime, Any should not be used with instance + checks. + """ + def __new__(cls, *args, **kwargs): + if cls is Any: + raise TypeError("Any cannot be instantiated") + return super().__new__(cls, *args, **kwargs) + + +ClassVar = typing.ClassVar + +# On older versions of typing there is an internal class named "Final". +# 3.8+ +if hasattr(typing, 'Final') and sys.version_info[:2] >= (3, 7): + Final = typing.Final +# 3.7 +else: + class _FinalForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + Final = _FinalForm('Final', + doc="""A special typing construct to indicate that a name + cannot be re-assigned or overridden in a subclass. + For example: + + MAX_SIZE: Final = 9000 + MAX_SIZE += 1 # Error reported by type checker + + class Connection: + TIMEOUT: Final[int] = 10 + class FastConnector(Connection): + TIMEOUT = 1 # Error reported by type checker + + There is no runtime checking of these properties.""") + +if sys.version_info >= (3, 11): + final = typing.final +else: + # @final exists in 3.8+, but we backport it for all versions + # before 3.11 to keep support for the __final__ attribute. + # See https://bugs.python.org/issue46342 + def final(f): + """This decorator can be used to indicate to type checkers that + the decorated method cannot be overridden, and decorated class + cannot be subclassed. For example: + + class Base: + @final + def done(self) -> None: + ... + class Sub(Base): + def done(self) -> None: # Error reported by type checker + ... + @final + class Leaf: + ... + class Other(Leaf): # Error reported by type checker + ... + + There is no runtime checking of these properties. The decorator + sets the ``__final__`` attribute to ``True`` on the decorated object + to allow runtime introspection. + """ + try: + f.__final__ = True + except (AttributeError, TypeError): + # Skip the attribute silently if it is not writable. + # AttributeError happens if the object has __slots__ or a + # read-only property, TypeError if it's a builtin class. + pass + return f + + +def IntVar(name): + return typing.TypeVar(name) + + +# 3.8+: +if hasattr(typing, 'Literal'): + Literal = typing.Literal +# 3.7: +else: + class _LiteralForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + return typing._GenericAlias(self, parameters) + + Literal = _LiteralForm('Literal', + doc="""A type that can be used to indicate to type checkers + that the corresponding value has a value literally equivalent + to the provided parameter. For example: + + var: Literal[4] = 4 + + The type checker understands that 'var' is literally equal to + the value 4 and no other value. + + Literal[...] cannot be subclassed. There is no runtime + checking verifying that the parameter is actually a value + instead of a type.""") + + +_overload_dummy = typing._overload_dummy # noqa + + +if hasattr(typing, "get_overloads"): # 3.11+ + overload = typing.overload + get_overloads = typing.get_overloads + clear_overloads = typing.clear_overloads +else: + # {module: {qualname: {firstlineno: func}}} + _overload_registry = collections.defaultdict( + functools.partial(collections.defaultdict, dict) + ) + + def overload(func): + """Decorator for overloaded functions/methods. + + In a stub file, place two or more stub definitions for the same + function in a row, each decorated with @overload. For example: + + @overload + def utf8(value: None) -> None: ... + @overload + def utf8(value: bytes) -> bytes: ... + @overload + def utf8(value: str) -> bytes: ... + + In a non-stub file (i.e. a regular .py file), do the same but + follow it with an implementation. The implementation should *not* + be decorated with @overload. For example: + + @overload + def utf8(value: None) -> None: ... + @overload + def utf8(value: bytes) -> bytes: ... + @overload + def utf8(value: str) -> bytes: ... + def utf8(value): + # implementation goes here + + The overloads for a function can be retrieved at runtime using the + get_overloads() function. + """ + # classmethod and staticmethod + f = getattr(func, "__func__", func) + try: + _overload_registry[f.__module__][f.__qualname__][ + f.__code__.co_firstlineno + ] = func + except AttributeError: + # Not a normal function; ignore. + pass + return _overload_dummy + + def get_overloads(func): + """Return all defined overloads for *func* as a sequence.""" + # classmethod and staticmethod + f = getattr(func, "__func__", func) + if f.__module__ not in _overload_registry: + return [] + mod_dict = _overload_registry[f.__module__] + if f.__qualname__ not in mod_dict: + return [] + return list(mod_dict[f.__qualname__].values()) + + def clear_overloads(): + """Clear all overloads in the registry.""" + _overload_registry.clear() + + +# This is not a real generic class. Don't use outside annotations. +Type = typing.Type + +# Various ABCs mimicking those in collections.abc. +# A few are simply re-exported for completeness. + + +Awaitable = typing.Awaitable +Coroutine = typing.Coroutine +AsyncIterable = typing.AsyncIterable +AsyncIterator = typing.AsyncIterator +Deque = typing.Deque +ContextManager = typing.ContextManager +AsyncContextManager = typing.AsyncContextManager +DefaultDict = typing.DefaultDict + +# 3.7.2+ +if hasattr(typing, 'OrderedDict'): + OrderedDict = typing.OrderedDict +# 3.7.0-3.7.2 +else: + OrderedDict = typing._alias(collections.OrderedDict, (KT, VT)) + +Counter = typing.Counter +ChainMap = typing.ChainMap +AsyncGenerator = typing.AsyncGenerator +NewType = typing.NewType +Text = typing.Text +TYPE_CHECKING = typing.TYPE_CHECKING + + +_PROTO_WHITELIST = ['Callable', 'Awaitable', + 'Iterable', 'Iterator', 'AsyncIterable', 'AsyncIterator', + 'Hashable', 'Sized', 'Container', 'Collection', 'Reversible', + 'ContextManager', 'AsyncContextManager'] + + +def _get_protocol_attrs(cls): + attrs = set() + for base in cls.__mro__[:-1]: # without object + if base.__name__ in ('Protocol', 'Generic'): + continue + annotations = getattr(base, '__annotations__', {}) + for attr in list(base.__dict__.keys()) + list(annotations.keys()): + if (not attr.startswith('_abc_') and attr not in ( + '__abstractmethods__', '__annotations__', '__weakref__', + '_is_protocol', '_is_runtime_protocol', '__dict__', + '__args__', '__slots__', + '__next_in_mro__', '__parameters__', '__origin__', + '__orig_bases__', '__extra__', '__tree_hash__', + '__doc__', '__subclasshook__', '__init__', '__new__', + '__module__', '_MutableMapping__marker', '_gorg')): + attrs.add(attr) + return attrs + + +def _is_callable_members_only(cls): + return all(callable(getattr(cls, attr, None)) for attr in _get_protocol_attrs(cls)) + + +def _maybe_adjust_parameters(cls): + """Helper function used in Protocol.__init_subclass__ and _TypedDictMeta.__new__. + + The contents of this function are very similar + to logic found in typing.Generic.__init_subclass__ + on the CPython main branch. + """ + tvars = [] + if '__orig_bases__' in cls.__dict__: + tvars = typing._collect_type_vars(cls.__orig_bases__) + # Look for Generic[T1, ..., Tn] or Protocol[T1, ..., Tn]. + # If found, tvars must be a subset of it. + # If not found, tvars is it. + # Also check for and reject plain Generic, + # and reject multiple Generic[...] and/or Protocol[...]. + gvars = None + for base in cls.__orig_bases__: + if (isinstance(base, typing._GenericAlias) and + base.__origin__ in (typing.Generic, Protocol)): + # for error messages + the_base = base.__origin__.__name__ + if gvars is not None: + raise TypeError( + "Cannot inherit from Generic[...]" + " and/or Protocol[...] multiple types.") + gvars = base.__parameters__ + if gvars is None: + gvars = tvars + else: + tvarset = set(tvars) + gvarset = set(gvars) + if not tvarset <= gvarset: + s_vars = ', '.join(str(t) for t in tvars if t not in gvarset) + s_args = ', '.join(str(g) for g in gvars) + raise TypeError(f"Some type variables ({s_vars}) are" + f" not listed in {the_base}[{s_args}]") + tvars = gvars + cls.__parameters__ = tuple(tvars) + + +# 3.8+ +if hasattr(typing, 'Protocol'): + Protocol = typing.Protocol +# 3.7 +else: + + def _no_init(self, *args, **kwargs): + if type(self)._is_protocol: + raise TypeError('Protocols cannot be instantiated') + + class _ProtocolMeta(abc.ABCMeta): # noqa: B024 + # This metaclass is a bit unfortunate and exists only because of the lack + # of __instancehook__. + def __instancecheck__(cls, instance): + # We need this method for situations where attributes are + # assigned in __init__. + if ((not getattr(cls, '_is_protocol', False) or + _is_callable_members_only(cls)) and + issubclass(instance.__class__, cls)): + return True + if cls._is_protocol: + if all(hasattr(instance, attr) and + (not callable(getattr(cls, attr, None)) or + getattr(instance, attr) is not None) + for attr in _get_protocol_attrs(cls)): + return True + return super().__instancecheck__(instance) + + class Protocol(metaclass=_ProtocolMeta): + # There is quite a lot of overlapping code with typing.Generic. + # Unfortunately it is hard to avoid this while these live in two different + # modules. The duplicated code will be removed when Protocol is moved to typing. + """Base class for protocol classes. Protocol classes are defined as:: + + class Proto(Protocol): + def meth(self) -> int: + ... + + Such classes are primarily used with static type checkers that recognize + structural subtyping (static duck-typing), for example:: + + class C: + def meth(self) -> int: + return 0 + + def func(x: Proto) -> int: + return x.meth() + + func(C()) # Passes static type check + + See PEP 544 for details. Protocol classes decorated with + @typing_extensions.runtime act as simple-minded runtime protocol that checks + only the presence of given attributes, ignoring their type signatures. + + Protocol classes can be generic, they are defined as:: + + class GenProto(Protocol[T]): + def meth(self) -> T: + ... + """ + __slots__ = () + _is_protocol = True + + def __new__(cls, *args, **kwds): + if cls is Protocol: + raise TypeError("Type Protocol cannot be instantiated; " + "it can only be used as a base class") + return super().__new__(cls) + + @typing._tp_cache + def __class_getitem__(cls, params): + if not isinstance(params, tuple): + params = (params,) + if not params and cls is not typing.Tuple: + raise TypeError( + f"Parameter list to {cls.__qualname__}[...] cannot be empty") + msg = "Parameters to generic types must be types." + params = tuple(typing._type_check(p, msg) for p in params) # noqa + if cls is Protocol: + # Generic can only be subscripted with unique type variables. + if not all(isinstance(p, typing.TypeVar) for p in params): + i = 0 + while isinstance(params[i], typing.TypeVar): + i += 1 + raise TypeError( + "Parameters to Protocol[...] must all be type variables." + f" Parameter {i + 1} is {params[i]}") + if len(set(params)) != len(params): + raise TypeError( + "Parameters to Protocol[...] must all be unique") + else: + # Subscripting a regular Generic subclass. + _check_generic(cls, params, len(cls.__parameters__)) + return typing._GenericAlias(cls, params) + + def __init_subclass__(cls, *args, **kwargs): + if '__orig_bases__' in cls.__dict__: + error = typing.Generic in cls.__orig_bases__ + else: + error = typing.Generic in cls.__bases__ + if error: + raise TypeError("Cannot inherit from plain Generic") + _maybe_adjust_parameters(cls) + + # Determine if this is a protocol or a concrete subclass. + if not cls.__dict__.get('_is_protocol', None): + cls._is_protocol = any(b is Protocol for b in cls.__bases__) + + # Set (or override) the protocol subclass hook. + def _proto_hook(other): + if not cls.__dict__.get('_is_protocol', None): + return NotImplemented + if not getattr(cls, '_is_runtime_protocol', False): + if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']: + return NotImplemented + raise TypeError("Instance and class checks can only be used with" + " @runtime protocols") + if not _is_callable_members_only(cls): + if sys._getframe(2).f_globals['__name__'] in ['abc', 'functools']: + return NotImplemented + raise TypeError("Protocols with non-method members" + " don't support issubclass()") + if not isinstance(other, type): + # Same error as for issubclass(1, int) + raise TypeError('issubclass() arg 1 must be a class') + for attr in _get_protocol_attrs(cls): + for base in other.__mro__: + if attr in base.__dict__: + if base.__dict__[attr] is None: + return NotImplemented + break + annotations = getattr(base, '__annotations__', {}) + if (isinstance(annotations, typing.Mapping) and + attr in annotations and + isinstance(other, _ProtocolMeta) and + other._is_protocol): + break + else: + return NotImplemented + return True + if '__subclasshook__' not in cls.__dict__: + cls.__subclasshook__ = _proto_hook + + # We have nothing more to do for non-protocols. + if not cls._is_protocol: + return + + # Check consistency of bases. + for base in cls.__bases__: + if not (base in (object, typing.Generic) or + base.__module__ == 'collections.abc' and + base.__name__ in _PROTO_WHITELIST or + isinstance(base, _ProtocolMeta) and base._is_protocol): + raise TypeError('Protocols can only inherit from other' + f' protocols, got {repr(base)}') + cls.__init__ = _no_init + + +# 3.8+ +if hasattr(typing, 'runtime_checkable'): + runtime_checkable = typing.runtime_checkable +# 3.7 +else: + def runtime_checkable(cls): + """Mark a protocol class as a runtime protocol, so that it + can be used with isinstance() and issubclass(). Raise TypeError + if applied to a non-protocol class. + + This allows a simple-minded structural check very similar to the + one-offs in collections.abc such as Hashable. + """ + if not isinstance(cls, _ProtocolMeta) or not cls._is_protocol: + raise TypeError('@runtime_checkable can be only applied to protocol classes,' + f' got {cls!r}') + cls._is_runtime_protocol = True + return cls + + +# Exists for backwards compatibility. +runtime = runtime_checkable + + +# 3.8+ +if hasattr(typing, 'SupportsIndex'): + SupportsIndex = typing.SupportsIndex +# 3.7 +else: + @runtime_checkable + class SupportsIndex(Protocol): + __slots__ = () + + @abc.abstractmethod + def __index__(self) -> int: + pass + + +if hasattr(typing, "Required"): + # The standard library TypedDict in Python 3.8 does not store runtime information + # about which (if any) keys are optional. See https://bugs.python.org/issue38834 + # The standard library TypedDict in Python 3.9.0/1 does not honour the "total" + # keyword with old-style TypedDict(). See https://bugs.python.org/issue42059 + # The standard library TypedDict below Python 3.11 does not store runtime + # information about optional and required keys when using Required or NotRequired. + # Generic TypedDicts are also impossible using typing.TypedDict on Python <3.11. + TypedDict = typing.TypedDict + _TypedDictMeta = typing._TypedDictMeta + is_typeddict = typing.is_typeddict +else: + def _check_fails(cls, other): + try: + if sys._getframe(1).f_globals['__name__'] not in ['abc', + 'functools', + 'typing']: + # Typed dicts are only for static structural subtyping. + raise TypeError('TypedDict does not support instance and class checks') + except (AttributeError, ValueError): + pass + return False + + def _dict_new(*args, **kwargs): + if not args: + raise TypeError('TypedDict.__new__(): not enough arguments') + _, args = args[0], args[1:] # allow the "cls" keyword be passed + return dict(*args, **kwargs) + + _dict_new.__text_signature__ = '($cls, _typename, _fields=None, /, **kwargs)' + + def _typeddict_new(*args, total=True, **kwargs): + if not args: + raise TypeError('TypedDict.__new__(): not enough arguments') + _, args = args[0], args[1:] # allow the "cls" keyword be passed + if args: + typename, args = args[0], args[1:] # allow the "_typename" keyword be passed + elif '_typename' in kwargs: + typename = kwargs.pop('_typename') + import warnings + warnings.warn("Passing '_typename' as keyword argument is deprecated", + DeprecationWarning, stacklevel=2) + else: + raise TypeError("TypedDict.__new__() missing 1 required positional " + "argument: '_typename'") + if args: + try: + fields, = args # allow the "_fields" keyword be passed + except ValueError: + raise TypeError('TypedDict.__new__() takes from 2 to 3 ' + f'positional arguments but {len(args) + 2} ' + 'were given') + elif '_fields' in kwargs and len(kwargs) == 1: + fields = kwargs.pop('_fields') + import warnings + warnings.warn("Passing '_fields' as keyword argument is deprecated", + DeprecationWarning, stacklevel=2) + else: + fields = None + + if fields is None: + fields = kwargs + elif kwargs: + raise TypeError("TypedDict takes either a dict or keyword arguments," + " but not both") + + ns = {'__annotations__': dict(fields)} + try: + # Setting correct module is necessary to make typed dict classes pickleable. + ns['__module__'] = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + + return _TypedDictMeta(typename, (), ns, total=total) + + _typeddict_new.__text_signature__ = ('($cls, _typename, _fields=None,' + ' /, *, total=True, **kwargs)') + + class _TypedDictMeta(type): + def __init__(cls, name, bases, ns, total=True): + super().__init__(name, bases, ns) + + def __new__(cls, name, bases, ns, total=True): + # Create new typed dict class object. + # This method is called directly when TypedDict is subclassed, + # or via _typeddict_new when TypedDict is instantiated. This way + # TypedDict supports all three syntaxes described in its docstring. + # Subclasses and instances of TypedDict return actual dictionaries + # via _dict_new. + ns['__new__'] = _typeddict_new if name == 'TypedDict' else _dict_new + # Don't insert typing.Generic into __bases__ here, + # or Generic.__init_subclass__ will raise TypeError + # in the super().__new__() call. + # Instead, monkey-patch __bases__ onto the class after it's been created. + tp_dict = super().__new__(cls, name, (dict,), ns) + + if any(issubclass(base, typing.Generic) for base in bases): + tp_dict.__bases__ = (typing.Generic, dict) + _maybe_adjust_parameters(tp_dict) + + annotations = {} + own_annotations = ns.get('__annotations__', {}) + msg = "TypedDict('Name', {f0: t0, f1: t1, ...}); each t must be a type" + own_annotations = { + n: typing._type_check(tp, msg) for n, tp in own_annotations.items() + } + required_keys = set() + optional_keys = set() + + for base in bases: + annotations.update(base.__dict__.get('__annotations__', {})) + required_keys.update(base.__dict__.get('__required_keys__', ())) + optional_keys.update(base.__dict__.get('__optional_keys__', ())) + + annotations.update(own_annotations) + for annotation_key, annotation_type in own_annotations.items(): + annotation_origin = get_origin(annotation_type) + if annotation_origin is Annotated: + annotation_args = get_args(annotation_type) + if annotation_args: + annotation_type = annotation_args[0] + annotation_origin = get_origin(annotation_type) + + if annotation_origin is Required: + required_keys.add(annotation_key) + elif annotation_origin is NotRequired: + optional_keys.add(annotation_key) + elif total: + required_keys.add(annotation_key) + else: + optional_keys.add(annotation_key) + + tp_dict.__annotations__ = annotations + tp_dict.__required_keys__ = frozenset(required_keys) + tp_dict.__optional_keys__ = frozenset(optional_keys) + if not hasattr(tp_dict, '__total__'): + tp_dict.__total__ = total + return tp_dict + + __instancecheck__ = __subclasscheck__ = _check_fails + + TypedDict = _TypedDictMeta('TypedDict', (dict,), {}) + TypedDict.__module__ = __name__ + TypedDict.__doc__ = \ + """A simple typed name space. At runtime it is equivalent to a plain dict. + + TypedDict creates a dictionary type that expects all of its + instances to have a certain set of keys, with each key + associated with a value of a consistent type. This expectation + is not checked at runtime but is only enforced by type checkers. + Usage:: + + class Point2D(TypedDict): + x: int + y: int + label: str + + a: Point2D = {'x': 1, 'y': 2, 'label': 'good'} # OK + b: Point2D = {'z': 3, 'label': 'bad'} # Fails type check + + assert Point2D(x=1, y=2, label='first') == dict(x=1, y=2, label='first') + + The type info can be accessed via the Point2D.__annotations__ dict, and + the Point2D.__required_keys__ and Point2D.__optional_keys__ frozensets. + TypedDict supports two additional equivalent forms:: + + Point2D = TypedDict('Point2D', x=int, y=int, label=str) + Point2D = TypedDict('Point2D', {'x': int, 'y': int, 'label': str}) + + The class syntax is only supported in Python 3.6+, while two other + syntax forms work for Python 2.7 and 3.2+ + """ + + if hasattr(typing, "_TypedDictMeta"): + _TYPEDDICT_TYPES = (typing._TypedDictMeta, _TypedDictMeta) + else: + _TYPEDDICT_TYPES = (_TypedDictMeta,) + + def is_typeddict(tp): + """Check if an annotation is a TypedDict class + + For example:: + class Film(TypedDict): + title: str + year: int + + is_typeddict(Film) # => True + is_typeddict(Union[list, str]) # => False + """ + return isinstance(tp, tuple(_TYPEDDICT_TYPES)) + + +if hasattr(typing, "assert_type"): + assert_type = typing.assert_type + +else: + def assert_type(__val, __typ): + """Assert (to the type checker) that the value is of the given type. + + When the type checker encounters a call to assert_type(), it + emits an error if the value is not of the specified type:: + + def greet(name: str) -> None: + assert_type(name, str) # ok + assert_type(name, int) # type checker error + + At runtime this returns the first argument unchanged and otherwise + does nothing. + """ + return __val + + +if hasattr(typing, "Required"): + get_type_hints = typing.get_type_hints +else: + import functools + import types + + # replaces _strip_annotations() + def _strip_extras(t): + """Strips Annotated, Required and NotRequired from a given type.""" + if isinstance(t, _AnnotatedAlias): + return _strip_extras(t.__origin__) + if hasattr(t, "__origin__") and t.__origin__ in (Required, NotRequired): + return _strip_extras(t.__args__[0]) + if isinstance(t, typing._GenericAlias): + stripped_args = tuple(_strip_extras(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + return t.copy_with(stripped_args) + if hasattr(types, "GenericAlias") and isinstance(t, types.GenericAlias): + stripped_args = tuple(_strip_extras(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + return types.GenericAlias(t.__origin__, stripped_args) + if hasattr(types, "UnionType") and isinstance(t, types.UnionType): + stripped_args = tuple(_strip_extras(a) for a in t.__args__) + if stripped_args == t.__args__: + return t + return functools.reduce(operator.or_, stripped_args) + + return t + + def get_type_hints(obj, globalns=None, localns=None, include_extras=False): + """Return type hints for an object. + + This is often the same as obj.__annotations__, but it handles + forward references encoded as string literals, adds Optional[t] if a + default value equal to None is set and recursively replaces all + 'Annotated[T, ...]', 'Required[T]' or 'NotRequired[T]' with 'T' + (unless 'include_extras=True'). + + The argument may be a module, class, method, or function. The annotations + are returned as a dictionary. For classes, annotations include also + inherited members. + + TypeError is raised if the argument is not of a type that can contain + annotations, and an empty dictionary is returned if no annotations are + present. + + BEWARE -- the behavior of globalns and localns is counterintuitive + (unless you are familiar with how eval() and exec() work). The + search order is locals first, then globals. + + - If no dict arguments are passed, an attempt is made to use the + globals from obj (or the respective module's globals for classes), + and these are also used as the locals. If the object does not appear + to have globals, an empty dictionary is used. + + - If one dict argument is passed, it is used for both globals and + locals. + + - If two dict arguments are passed, they specify globals and + locals, respectively. + """ + if hasattr(typing, "Annotated"): + hint = typing.get_type_hints( + obj, globalns=globalns, localns=localns, include_extras=True + ) + else: + hint = typing.get_type_hints(obj, globalns=globalns, localns=localns) + if include_extras: + return hint + return {k: _strip_extras(t) for k, t in hint.items()} + + +# Python 3.9+ has PEP 593 (Annotated) +if hasattr(typing, 'Annotated'): + Annotated = typing.Annotated + # Not exported and not a public API, but needed for get_origin() and get_args() + # to work. + _AnnotatedAlias = typing._AnnotatedAlias +# 3.7-3.8 +else: + class _AnnotatedAlias(typing._GenericAlias, _root=True): + """Runtime representation of an annotated type. + + At its core 'Annotated[t, dec1, dec2, ...]' is an alias for the type 't' + with extra annotations. The alias behaves like a normal typing alias, + instantiating is the same as instantiating the underlying type, binding + it to types is also the same. + """ + def __init__(self, origin, metadata): + if isinstance(origin, _AnnotatedAlias): + metadata = origin.__metadata__ + metadata + origin = origin.__origin__ + super().__init__(origin, origin) + self.__metadata__ = metadata + + def copy_with(self, params): + assert len(params) == 1 + new_type = params[0] + return _AnnotatedAlias(new_type, self.__metadata__) + + def __repr__(self): + return (f"typing_extensions.Annotated[{typing._type_repr(self.__origin__)}, " + f"{', '.join(repr(a) for a in self.__metadata__)}]") + + def __reduce__(self): + return operator.getitem, ( + Annotated, (self.__origin__,) + self.__metadata__ + ) + + def __eq__(self, other): + if not isinstance(other, _AnnotatedAlias): + return NotImplemented + if self.__origin__ != other.__origin__: + return False + return self.__metadata__ == other.__metadata__ + + def __hash__(self): + return hash((self.__origin__, self.__metadata__)) + + class Annotated: + """Add context specific metadata to a type. + + Example: Annotated[int, runtime_check.Unsigned] indicates to the + hypothetical runtime_check module that this type is an unsigned int. + Every other consumer of this type can ignore this metadata and treat + this type as int. + + The first argument to Annotated must be a valid type (and will be in + the __origin__ field), the remaining arguments are kept as a tuple in + the __extra__ field. + + Details: + + - It's an error to call `Annotated` with less than two arguments. + - Nested Annotated are flattened:: + + Annotated[Annotated[T, Ann1, Ann2], Ann3] == Annotated[T, Ann1, Ann2, Ann3] + + - Instantiating an annotated type is equivalent to instantiating the + underlying type:: + + Annotated[C, Ann1](5) == C(5) + + - Annotated can be used as a generic type alias:: + + Optimized = Annotated[T, runtime.Optimize()] + Optimized[int] == Annotated[int, runtime.Optimize()] + + OptimizedList = Annotated[List[T], runtime.Optimize()] + OptimizedList[int] == Annotated[List[int], runtime.Optimize()] + """ + + __slots__ = () + + def __new__(cls, *args, **kwargs): + raise TypeError("Type Annotated cannot be instantiated.") + + @typing._tp_cache + def __class_getitem__(cls, params): + if not isinstance(params, tuple) or len(params) < 2: + raise TypeError("Annotated[...] should be used " + "with at least two arguments (a type and an " + "annotation).") + allowed_special_forms = (ClassVar, Final) + if get_origin(params[0]) in allowed_special_forms: + origin = params[0] + else: + msg = "Annotated[t, ...]: t must be a type." + origin = typing._type_check(params[0], msg) + metadata = tuple(params[1:]) + return _AnnotatedAlias(origin, metadata) + + def __init_subclass__(cls, *args, **kwargs): + raise TypeError( + f"Cannot subclass {cls.__module__}.Annotated" + ) + +# Python 3.8 has get_origin() and get_args() but those implementations aren't +# Annotated-aware, so we can't use those. Python 3.9's versions don't support +# ParamSpecArgs and ParamSpecKwargs, so only Python 3.10's versions will do. +if sys.version_info[:2] >= (3, 10): + get_origin = typing.get_origin + get_args = typing.get_args +# 3.7-3.9 +else: + try: + # 3.9+ + from typing import _BaseGenericAlias + except ImportError: + _BaseGenericAlias = typing._GenericAlias + try: + # 3.9+ + from typing import GenericAlias as _typing_GenericAlias + except ImportError: + _typing_GenericAlias = typing._GenericAlias + + def get_origin(tp): + """Get the unsubscripted version of a type. + + This supports generic types, Callable, Tuple, Union, Literal, Final, ClassVar + and Annotated. Return None for unsupported types. Examples:: + + get_origin(Literal[42]) is Literal + get_origin(int) is None + get_origin(ClassVar[int]) is ClassVar + get_origin(Generic) is Generic + get_origin(Generic[T]) is Generic + get_origin(Union[T, int]) is Union + get_origin(List[Tuple[T, T]][int]) == list + get_origin(P.args) is P + """ + if isinstance(tp, _AnnotatedAlias): + return Annotated + if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias, _BaseGenericAlias, + ParamSpecArgs, ParamSpecKwargs)): + return tp.__origin__ + if tp is typing.Generic: + return typing.Generic + return None + + def get_args(tp): + """Get type arguments with all substitutions performed. + + For unions, basic simplifications used by Union constructor are performed. + Examples:: + get_args(Dict[str, int]) == (str, int) + get_args(int) == () + get_args(Union[int, Union[T, int], str][int]) == (int, str) + get_args(Union[int, Tuple[T, int]][str]) == (int, Tuple[str, int]) + get_args(Callable[[], T][int]) == ([], int) + """ + if isinstance(tp, _AnnotatedAlias): + return (tp.__origin__,) + tp.__metadata__ + if isinstance(tp, (typing._GenericAlias, _typing_GenericAlias)): + if getattr(tp, "_special", False): + return () + res = tp.__args__ + if get_origin(tp) is collections.abc.Callable and res[0] is not Ellipsis: + res = (list(res[:-1]), res[-1]) + return res + return () + + +# 3.10+ +if hasattr(typing, 'TypeAlias'): + TypeAlias = typing.TypeAlias +# 3.9 +elif sys.version_info[:2] >= (3, 9): + class _TypeAliasForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_TypeAliasForm + def TypeAlias(self, parameters): + """Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example above. + """ + raise TypeError(f"{self} is not subscriptable") +# 3.7-3.8 +else: + class _TypeAliasForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + TypeAlias = _TypeAliasForm('TypeAlias', + doc="""Special marker indicating that an assignment should + be recognized as a proper type alias definition by type + checkers. + + For example:: + + Predicate: TypeAlias = Callable[..., bool] + + It's invalid when used anywhere except as in the example + above.""") + + +class _DefaultMixin: + """Mixin for TypeVarLike defaults.""" + + __slots__ = () + + def __init__(self, default): + if isinstance(default, (tuple, list)): + self.__default__ = tuple((typing._type_check(d, "Default must be a type") + for d in default)) + elif default: + self.__default__ = typing._type_check(default, "Default must be a type") + else: + self.__default__ = None + + +# Add default and infer_variance parameters from PEP 696 and 695 +class TypeVar(typing.TypeVar, _DefaultMixin, _root=True): + """Type variable.""" + + __module__ = 'typing' + + def __init__(self, name, *constraints, bound=None, + covariant=False, contravariant=False, + default=None, infer_variance=False): + super().__init__(name, *constraints, bound=bound, covariant=covariant, + contravariant=contravariant) + _DefaultMixin.__init__(self, default) + self.__infer_variance__ = infer_variance + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + +# Python 3.10+ has PEP 612 +if hasattr(typing, 'ParamSpecArgs'): + ParamSpecArgs = typing.ParamSpecArgs + ParamSpecKwargs = typing.ParamSpecKwargs +# 3.7-3.9 +else: + class _Immutable: + """Mixin to indicate that object should not be copied.""" + __slots__ = () + + def __copy__(self): + return self + + def __deepcopy__(self, memo): + return self + + class ParamSpecArgs(_Immutable): + """The args for a ParamSpec object. + + Given a ParamSpec object P, P.args is an instance of ParamSpecArgs. + + ParamSpecArgs objects have a reference back to their ParamSpec: + + P.args.__origin__ is P + + This type is meant for runtime introspection and has no special meaning to + static type checkers. + """ + def __init__(self, origin): + self.__origin__ = origin + + def __repr__(self): + return f"{self.__origin__.__name__}.args" + + def __eq__(self, other): + if not isinstance(other, ParamSpecArgs): + return NotImplemented + return self.__origin__ == other.__origin__ + + class ParamSpecKwargs(_Immutable): + """The kwargs for a ParamSpec object. + + Given a ParamSpec object P, P.kwargs is an instance of ParamSpecKwargs. + + ParamSpecKwargs objects have a reference back to their ParamSpec: + + P.kwargs.__origin__ is P + + This type is meant for runtime introspection and has no special meaning to + static type checkers. + """ + def __init__(self, origin): + self.__origin__ = origin + + def __repr__(self): + return f"{self.__origin__.__name__}.kwargs" + + def __eq__(self, other): + if not isinstance(other, ParamSpecKwargs): + return NotImplemented + return self.__origin__ == other.__origin__ + +# 3.10+ +if hasattr(typing, 'ParamSpec'): + + # Add default Parameter - PEP 696 + class ParamSpec(typing.ParamSpec, _DefaultMixin, _root=True): + """Parameter specification variable.""" + + __module__ = 'typing' + + def __init__(self, name, *, bound=None, covariant=False, contravariant=False, + default=None): + super().__init__(name, bound=bound, covariant=covariant, + contravariant=contravariant) + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + +# 3.7-3.9 +else: + + # Inherits from list as a workaround for Callable checks in Python < 3.9.2. + class ParamSpec(list, _DefaultMixin): + """Parameter specification variable. + + Usage:: + + P = ParamSpec('P') + + Parameter specification variables exist primarily for the benefit of static + type checkers. They are used to forward the parameter types of one + callable to another callable, a pattern commonly found in higher order + functions and decorators. They are only valid when used in ``Concatenate``, + or s the first argument to ``Callable``. In Python 3.10 and higher, + they are also supported in user-defined Generics at runtime. + See class Generic for more information on generic types. An + example for annotating a decorator:: + + T = TypeVar('T') + P = ParamSpec('P') + + def add_logging(f: Callable[P, T]) -> Callable[P, T]: + '''A type-safe decorator to add logging to a function.''' + def inner(*args: P.args, **kwargs: P.kwargs) -> T: + logging.info(f'{f.__name__} was called') + return f(*args, **kwargs) + return inner + + @add_logging + def add_two(x: float, y: float) -> float: + '''Add two numbers together.''' + return x + y + + Parameter specification variables defined with covariant=True or + contravariant=True can be used to declare covariant or contravariant + generic types. These keyword arguments are valid, but their actual semantics + are yet to be decided. See PEP 612 for details. + + Parameter specification variables can be introspected. e.g.: + + P.__name__ == 'T' + P.__bound__ == None + P.__covariant__ == False + P.__contravariant__ == False + + Note that only parameter specification variables defined in global scope can + be pickled. + """ + + # Trick Generic __parameters__. + __class__ = typing.TypeVar + + @property + def args(self): + return ParamSpecArgs(self) + + @property + def kwargs(self): + return ParamSpecKwargs(self) + + def __init__(self, name, *, bound=None, covariant=False, contravariant=False, + default=None): + super().__init__([self]) + self.__name__ = name + self.__covariant__ = bool(covariant) + self.__contravariant__ = bool(contravariant) + if bound: + self.__bound__ = typing._type_check(bound, 'Bound must be a type.') + else: + self.__bound__ = None + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + def __repr__(self): + if self.__covariant__: + prefix = '+' + elif self.__contravariant__: + prefix = '-' + else: + prefix = '~' + return prefix + self.__name__ + + def __hash__(self): + return object.__hash__(self) + + def __eq__(self, other): + return self is other + + def __reduce__(self): + return self.__name__ + + # Hack to get typing._type_check to pass. + def __call__(self, *args, **kwargs): + pass + + +# 3.7-3.9 +if not hasattr(typing, 'Concatenate'): + # Inherits from list as a workaround for Callable checks in Python < 3.9.2. + class _ConcatenateGenericAlias(list): + + # Trick Generic into looking into this for __parameters__. + __class__ = typing._GenericAlias + + # Flag in 3.8. + _special = False + + def __init__(self, origin, args): + super().__init__(args) + self.__origin__ = origin + self.__args__ = args + + def __repr__(self): + _type_repr = typing._type_repr + return (f'{_type_repr(self.__origin__)}' + f'[{", ".join(_type_repr(arg) for arg in self.__args__)}]') + + def __hash__(self): + return hash((self.__origin__, self.__args__)) + + # Hack to get typing._type_check to pass in Generic. + def __call__(self, *args, **kwargs): + pass + + @property + def __parameters__(self): + return tuple( + tp for tp in self.__args__ if isinstance(tp, (typing.TypeVar, ParamSpec)) + ) + + +# 3.7-3.9 +@typing._tp_cache +def _concatenate_getitem(self, parameters): + if parameters == (): + raise TypeError("Cannot take a Concatenate of no types.") + if not isinstance(parameters, tuple): + parameters = (parameters,) + if not isinstance(parameters[-1], ParamSpec): + raise TypeError("The last parameter to Concatenate should be a " + "ParamSpec variable.") + msg = "Concatenate[arg, ...]: each arg must be a type." + parameters = tuple(typing._type_check(p, msg) for p in parameters) + return _ConcatenateGenericAlias(self, parameters) + + +# 3.10+ +if hasattr(typing, 'Concatenate'): + Concatenate = typing.Concatenate + _ConcatenateGenericAlias = typing._ConcatenateGenericAlias # noqa +# 3.9 +elif sys.version_info[:2] >= (3, 9): + @_TypeAliasForm + def Concatenate(self, parameters): + """Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """ + return _concatenate_getitem(self, parameters) +# 3.7-8 +else: + class _ConcatenateForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + return _concatenate_getitem(self, parameters) + + Concatenate = _ConcatenateForm( + 'Concatenate', + doc="""Used in conjunction with ``ParamSpec`` and ``Callable`` to represent a + higher order function which adds, removes or transforms parameters of a + callable. + + For example:: + + Callable[Concatenate[int, P], int] + + See PEP 612 for detailed information. + """) + +# 3.10+ +if hasattr(typing, 'TypeGuard'): + TypeGuard = typing.TypeGuard +# 3.9 +elif sys.version_info[:2] >= (3, 9): + class _TypeGuardForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_TypeGuardForm + def TypeGuard(self, parameters): + """Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """ + item = typing._type_check(parameters, f'{self} accepts only a single type.') + return typing._GenericAlias(self, (item,)) +# 3.7-3.8 +else: + class _TypeGuardForm(typing._SpecialForm, _root=True): + + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type') + return typing._GenericAlias(self, (item,)) + + TypeGuard = _TypeGuardForm( + 'TypeGuard', + doc="""Special typing form used to annotate the return type of a user-defined + type guard function. ``TypeGuard`` only accepts a single type argument. + At runtime, functions marked this way should return a boolean. + + ``TypeGuard`` aims to benefit *type narrowing* -- a technique used by static + type checkers to determine a more precise type of an expression within a + program's code flow. Usually type narrowing is done by analyzing + conditional code flow and applying the narrowing to a block of code. The + conditional expression here is sometimes referred to as a "type guard". + + Sometimes it would be convenient to use a user-defined boolean function + as a type guard. Such a function should use ``TypeGuard[...]`` as its + return type to alert static type checkers to this intention. + + Using ``-> TypeGuard`` tells the static type checker that for a given + function: + + 1. The return value is a boolean. + 2. If the return value is ``True``, the type of its argument + is the type inside ``TypeGuard``. + + For example:: + + def is_str(val: Union[str, float]): + # "isinstance" type guard + if isinstance(val, str): + # Type of ``val`` is narrowed to ``str`` + ... + else: + # Else, type of ``val`` is narrowed to ``float``. + ... + + Strict type narrowing is not enforced -- ``TypeB`` need not be a narrower + form of ``TypeA`` (it can even be a wider form) and this may lead to + type-unsafe results. The main reason is to allow for things like + narrowing ``List[object]`` to ``List[str]`` even though the latter is not + a subtype of the former, since ``List`` is invariant. The responsibility of + writing type-safe type guards is left to the user. + + ``TypeGuard`` also works with type variables. For more information, see + PEP 647 (User-Defined Type Guards). + """) + + +# Vendored from cpython typing._SpecialFrom +class _SpecialForm(typing._Final, _root=True): + __slots__ = ('_name', '__doc__', '_getitem') + + def __init__(self, getitem): + self._getitem = getitem + self._name = getitem.__name__ + self.__doc__ = getitem.__doc__ + + def __getattr__(self, item): + if item in {'__name__', '__qualname__'}: + return self._name + + raise AttributeError(item) + + def __mro_entries__(self, bases): + raise TypeError(f"Cannot subclass {self!r}") + + def __repr__(self): + return f'typing_extensions.{self._name}' + + def __reduce__(self): + return self._name + + def __call__(self, *args, **kwds): + raise TypeError(f"Cannot instantiate {self!r}") + + def __or__(self, other): + return typing.Union[self, other] + + def __ror__(self, other): + return typing.Union[other, self] + + def __instancecheck__(self, obj): + raise TypeError(f"{self} cannot be used with isinstance()") + + def __subclasscheck__(self, cls): + raise TypeError(f"{self} cannot be used with issubclass()") + + @typing._tp_cache + def __getitem__(self, parameters): + return self._getitem(self, parameters) + + +if hasattr(typing, "LiteralString"): + LiteralString = typing.LiteralString +else: + @_SpecialForm + def LiteralString(self, params): + """Represents an arbitrary literal string. + + Example:: + + from typing_extensions import LiteralString + + def query(sql: LiteralString) -> ...: + ... + + query("SELECT * FROM table") # ok + query(f"SELECT * FROM {input()}") # not ok + + See PEP 675 for details. + + """ + raise TypeError(f"{self} is not subscriptable") + + +if hasattr(typing, "Self"): + Self = typing.Self +else: + @_SpecialForm + def Self(self, params): + """Used to spell the type of "self" in classes. + + Example:: + + from typing import Self + + class ReturnsSelf: + def parse(self, data: bytes) -> Self: + ... + return self + + """ + + raise TypeError(f"{self} is not subscriptable") + + +if hasattr(typing, "Never"): + Never = typing.Never +else: + @_SpecialForm + def Never(self, params): + """The bottom type, a type that has no members. + + This can be used to define a function that should never be + called, or a function that never returns:: + + from typing_extensions import Never + + def never_call_me(arg: Never) -> None: + pass + + def int_or_str(arg: int | str) -> None: + never_call_me(arg) # type checker error + match arg: + case int(): + print("It's an int") + case str(): + print("It's a str") + case _: + never_call_me(arg) # ok, arg is of type Never + + """ + + raise TypeError(f"{self} is not subscriptable") + + +if hasattr(typing, 'Required'): + Required = typing.Required + NotRequired = typing.NotRequired +elif sys.version_info[:2] >= (3, 9): + class _ExtensionsSpecialForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + @_ExtensionsSpecialForm + def Required(self, parameters): + """A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + @_ExtensionsSpecialForm + def NotRequired(self, parameters): + """A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + +else: + class _RequiredForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return typing._GenericAlias(self, (item,)) + + Required = _RequiredForm( + 'Required', + doc="""A special typing construct to mark a key of a total=False TypedDict + as required. For example: + + class Movie(TypedDict, total=False): + title: Required[str] + year: int + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + + There is no runtime checking that a required key is actually provided + when instantiating a related TypedDict. + """) + NotRequired = _RequiredForm( + 'NotRequired', + doc="""A special typing construct to mark a key of a TypedDict as + potentially missing. For example: + + class Movie(TypedDict): + title: str + year: NotRequired[int] + + m = Movie( + title='The Matrix', # typechecker error if key is omitted + year=1999, + ) + """) + + +if hasattr(typing, "Unpack"): # 3.11+ + Unpack = typing.Unpack +elif sys.version_info[:2] >= (3, 9): + class _UnpackSpecialForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + class _UnpackAlias(typing._GenericAlias, _root=True): + __class__ = typing.TypeVar + + @_UnpackSpecialForm + def Unpack(self, parameters): + """A special typing construct to unpack a variadic type. For example: + + Shape = TypeVarTuple('Shape') + Batch = NewType('Batch', int) + + def add_batch_axis( + x: Array[Unpack[Shape]] + ) -> Array[Batch, Unpack[Shape]]: ... + + """ + item = typing._type_check(parameters, f'{self._name} accepts only a single type.') + return _UnpackAlias(self, (item,)) + + def _is_unpack(obj): + return isinstance(obj, _UnpackAlias) + +else: + class _UnpackAlias(typing._GenericAlias, _root=True): + __class__ = typing.TypeVar + + class _UnpackForm(typing._SpecialForm, _root=True): + def __repr__(self): + return 'typing_extensions.' + self._name + + def __getitem__(self, parameters): + item = typing._type_check(parameters, + f'{self._name} accepts only a single type.') + return _UnpackAlias(self, (item,)) + + Unpack = _UnpackForm( + 'Unpack', + doc="""A special typing construct to unpack a variadic type. For example: + + Shape = TypeVarTuple('Shape') + Batch = NewType('Batch', int) + + def add_batch_axis( + x: Array[Unpack[Shape]] + ) -> Array[Batch, Unpack[Shape]]: ... + + """) + + def _is_unpack(obj): + return isinstance(obj, _UnpackAlias) + + +if hasattr(typing, "TypeVarTuple"): # 3.11+ + + # Add default Parameter - PEP 696 + class TypeVarTuple(typing.TypeVarTuple, _DefaultMixin, _root=True): + """Type variable tuple.""" + + def __init__(self, name, *, default=None): + super().__init__(name) + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + +else: + class TypeVarTuple(_DefaultMixin): + """Type variable tuple. + + Usage:: + + Ts = TypeVarTuple('Ts') + + In the same way that a normal type variable is a stand-in for a single + type such as ``int``, a type variable *tuple* is a stand-in for a *tuple* + type such as ``Tuple[int, str]``. + + Type variable tuples can be used in ``Generic`` declarations. + Consider the following example:: + + class Array(Generic[*Ts]): ... + + The ``Ts`` type variable tuple here behaves like ``tuple[T1, T2]``, + where ``T1`` and ``T2`` are type variables. To use these type variables + as type parameters of ``Array``, we must *unpack* the type variable tuple using + the star operator: ``*Ts``. The signature of ``Array`` then behaves + as if we had simply written ``class Array(Generic[T1, T2]): ...``. + In contrast to ``Generic[T1, T2]``, however, ``Generic[*Shape]`` allows + us to parameterise the class with an *arbitrary* number of type parameters. + + Type variable tuples can be used anywhere a normal ``TypeVar`` can. + This includes class definitions, as shown above, as well as function + signatures and variable annotations:: + + class Array(Generic[*Ts]): + + def __init__(self, shape: Tuple[*Ts]): + self._shape: Tuple[*Ts] = shape + + def get_shape(self) -> Tuple[*Ts]: + return self._shape + + shape = (Height(480), Width(640)) + x: Array[Height, Width] = Array(shape) + y = abs(x) # Inferred type is Array[Height, Width] + z = x + x # ... is Array[Height, Width] + x.get_shape() # ... is tuple[Height, Width] + + """ + + # Trick Generic __parameters__. + __class__ = typing.TypeVar + + def __iter__(self): + yield self.__unpacked__ + + def __init__(self, name, *, default=None): + self.__name__ = name + _DefaultMixin.__init__(self, default) + + # for pickling: + try: + def_mod = sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + def_mod = None + if def_mod != 'typing_extensions': + self.__module__ = def_mod + + self.__unpacked__ = Unpack[self] + + def __repr__(self): + return self.__name__ + + def __hash__(self): + return object.__hash__(self) + + def __eq__(self, other): + return self is other + + def __reduce__(self): + return self.__name__ + + def __init_subclass__(self, *args, **kwds): + if '_root' not in kwds: + raise TypeError("Cannot subclass special typing classes") + + +if hasattr(typing, "reveal_type"): + reveal_type = typing.reveal_type +else: + def reveal_type(__obj: T) -> T: + """Reveal the inferred type of a variable. + + When a static type checker encounters a call to ``reveal_type()``, + it will emit the inferred type of the argument:: + + x: int = 1 + reveal_type(x) + + Running a static type checker (e.g., ``mypy``) on this example + will produce output similar to 'Revealed type is "builtins.int"'. + + At runtime, the function prints the runtime type of the + argument and returns it unchanged. + + """ + print(f"Runtime type is {type(__obj).__name__!r}", file=sys.stderr) + return __obj + + +if hasattr(typing, "assert_never"): + assert_never = typing.assert_never +else: + def assert_never(__arg: Never) -> Never: + """Assert to the type checker that a line of code is unreachable. + + Example:: + + def int_or_str(arg: int | str) -> None: + match arg: + case int(): + print("It's an int") + case str(): + print("It's a str") + case _: + assert_never(arg) + + If a type checker finds that a call to assert_never() is + reachable, it will emit an error. + + At runtime, this throws an exception when called. + + """ + raise AssertionError("Expected code to be unreachable") + + +if hasattr(typing, 'dataclass_transform'): + dataclass_transform = typing.dataclass_transform +else: + def dataclass_transform( + *, + eq_default: bool = True, + order_default: bool = False, + kw_only_default: bool = False, + field_specifiers: typing.Tuple[ + typing.Union[typing.Type[typing.Any], typing.Callable[..., typing.Any]], + ... + ] = (), + **kwargs: typing.Any, + ) -> typing.Callable[[T], T]: + """Decorator that marks a function, class, or metaclass as providing + dataclass-like behavior. + + Example: + + from typing_extensions import dataclass_transform + + _T = TypeVar("_T") + + # Used on a decorator function + @dataclass_transform() + def create_model(cls: type[_T]) -> type[_T]: + ... + return cls + + @create_model + class CustomerModel: + id: int + name: str + + # Used on a base class + @dataclass_transform() + class ModelBase: ... + + class CustomerModel(ModelBase): + id: int + name: str + + # Used on a metaclass + @dataclass_transform() + class ModelMeta(type): ... + + class ModelBase(metaclass=ModelMeta): ... + + class CustomerModel(ModelBase): + id: int + name: str + + Each of the ``CustomerModel`` classes defined in this example will now + behave similarly to a dataclass created with the ``@dataclasses.dataclass`` + decorator. For example, the type checker will synthesize an ``__init__`` + method. + + The arguments to this decorator can be used to customize this behavior: + - ``eq_default`` indicates whether the ``eq`` parameter is assumed to be + True or False if it is omitted by the caller. + - ``order_default`` indicates whether the ``order`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``kw_only_default`` indicates whether the ``kw_only`` parameter is + assumed to be True or False if it is omitted by the caller. + - ``field_specifiers`` specifies a static list of supported classes + or functions that describe fields, similar to ``dataclasses.field()``. + + At runtime, this decorator records its arguments in the + ``__dataclass_transform__`` attribute on the decorated object. + + See PEP 681 for details. + + """ + def decorator(cls_or_fn): + cls_or_fn.__dataclass_transform__ = { + "eq_default": eq_default, + "order_default": order_default, + "kw_only_default": kw_only_default, + "field_specifiers": field_specifiers, + "kwargs": kwargs, + } + return cls_or_fn + return decorator + + +if hasattr(typing, "override"): + override = typing.override +else: + _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any]) + + def override(__arg: _F) -> _F: + """Indicate that a method is intended to override a method in a base class. + + Usage: + + class Base: + def method(self) -> None: ... + pass + + class Child(Base): + @override + def method(self) -> None: + super().method() + + When this decorator is applied to a method, the type checker will + validate that it overrides a method with the same name on a base class. + This helps prevent bugs that may occur when a base class is changed + without an equivalent change to a child class. + + See PEP 698 for details. + + """ + return __arg + + +# We have to do some monkey patching to deal with the dual nature of +# Unpack/TypeVarTuple: +# - We want Unpack to be a kind of TypeVar so it gets accepted in +# Generic[Unpack[Ts]] +# - We want it to *not* be treated as a TypeVar for the purposes of +# counting generic parameters, so that when we subscript a generic, +# the runtime doesn't try to substitute the Unpack with the subscripted type. +if not hasattr(typing, "TypeVarTuple"): + typing._collect_type_vars = _collect_type_vars + typing._check_generic = _check_generic + + +# Backport typing.NamedTuple as it exists in Python 3.11. +# In 3.11, the ability to define generic `NamedTuple`s was supported. +# This was explicitly disallowed in 3.9-3.10, and only half-worked in <=3.8. +if sys.version_info >= (3, 11): + NamedTuple = typing.NamedTuple +else: + def _caller(): + try: + return sys._getframe(2).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): # For platforms without _getframe() + return None + + def _make_nmtuple(name, types, module, defaults=()): + fields = [n for n, t in types] + annotations = {n: typing._type_check(t, f"field {n} annotation must be a type") + for n, t in types} + nm_tpl = collections.namedtuple(name, fields, + defaults=defaults, module=module) + nm_tpl.__annotations__ = nm_tpl.__new__.__annotations__ = annotations + # The `_field_types` attribute was removed in 3.9; + # in earlier versions, it is the same as the `__annotations__` attribute + if sys.version_info < (3, 9): + nm_tpl._field_types = annotations + return nm_tpl + + _prohibited_namedtuple_fields = typing._prohibited + _special_namedtuple_fields = frozenset({'__module__', '__name__', '__annotations__'}) + + class _NamedTupleMeta(type): + def __new__(cls, typename, bases, ns): + assert _NamedTuple in bases + for base in bases: + if base is not _NamedTuple and base is not typing.Generic: + raise TypeError( + 'can only inherit from a NamedTuple type and Generic') + bases = tuple(tuple if base is _NamedTuple else base for base in bases) + types = ns.get('__annotations__', {}) + default_names = [] + for field_name in types: + if field_name in ns: + default_names.append(field_name) + elif default_names: + raise TypeError(f"Non-default namedtuple field {field_name} " + f"cannot follow default field" + f"{'s' if len(default_names) > 1 else ''} " + f"{', '.join(default_names)}") + nm_tpl = _make_nmtuple( + typename, types.items(), + defaults=[ns[n] for n in default_names], + module=ns['__module__'] + ) + nm_tpl.__bases__ = bases + if typing.Generic in bases: + class_getitem = typing.Generic.__class_getitem__.__func__ + nm_tpl.__class_getitem__ = classmethod(class_getitem) + # update from user namespace without overriding special namedtuple attributes + for key in ns: + if key in _prohibited_namedtuple_fields: + raise AttributeError("Cannot overwrite NamedTuple attribute " + key) + elif key not in _special_namedtuple_fields and key not in nm_tpl._fields: + setattr(nm_tpl, key, ns[key]) + if typing.Generic in bases: + nm_tpl.__init_subclass__() + return nm_tpl + + def NamedTuple(__typename, __fields=None, **kwargs): + if __fields is None: + __fields = kwargs.items() + elif kwargs: + raise TypeError("Either list of fields or keywords" + " can be provided to NamedTuple, not both") + return _make_nmtuple(__typename, __fields, module=_caller()) + + NamedTuple.__doc__ = typing.NamedTuple.__doc__ + _NamedTuple = type.__new__(_NamedTupleMeta, 'NamedTuple', (), {}) + + # On 3.8+, alter the signature so that it matches typing.NamedTuple. + # The signature of typing.NamedTuple on >=3.8 is invalid syntax in Python 3.7, + # so just leave the signature as it is on 3.7. + if sys.version_info >= (3, 8): + NamedTuple.__text_signature__ = '(typename, fields=None, /, **kwargs)' + + def _namedtuple_mro_entries(bases): + assert NamedTuple in bases + return (_NamedTuple,) + + NamedTuple.__mro_entries__ = _namedtuple_mro_entries diff --git a/third_party/python/setuptools/pkg_resources/_vendor/zipp.py b/third_party/python/setuptools/pkg_resources/_vendor/zipp.py new file mode 100644 index 0000000000..26b723c1fd --- /dev/null +++ b/third_party/python/setuptools/pkg_resources/_vendor/zipp.py @@ -0,0 +1,329 @@ +import io +import posixpath +import zipfile +import itertools +import contextlib +import sys +import pathlib + +if sys.version_info < (3, 7): + from collections import OrderedDict +else: + OrderedDict = dict + + +__all__ = ['Path'] + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + +_dedupe = OrderedDict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + +class CompleteDirs(zipfile.ZipFile): + """ + A ZipFile subclass that ensures that implied directories + are always included in the namelist. + """ + + @staticmethod + def _implied_dirs(names): + parents = itertools.chain.from_iterable(map(_parents, names)) + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) + + def namelist(self): + names = super(CompleteDirs, self).namelist() + return names + list(self._implied_dirs(names)) + + def _name_set(self): + return set(self.namelist()) + + def resolve_dir(self, name): + """ + If the name represents a directory, return that name + as a directory (with the trailing slash). + """ + names = self._name_set() + dirname = name + '/' + dir_match = name not in names and dirname in names + return dirname if dir_match else name + + @classmethod + def make(cls, source): + """ + Given a source (filename or zipfile), return an + appropriate CompleteDirs subclass. + """ + if isinstance(source, CompleteDirs): + return source + + if not isinstance(source, zipfile.ZipFile): + return cls(_pathlib_compat(source)) + + # Only allow for FastLookup when supplied zipfile is read-only + if 'r' not in source.mode: + cls = CompleteDirs + + source.__class__ = cls + return source + + +class FastLookup(CompleteDirs): + """ + ZipFile subclass to ensure implicit + dirs exist and are resolved rapidly. + """ + + def namelist(self): + with contextlib.suppress(AttributeError): + return self.__names + self.__names = super(FastLookup, self).namelist() + return self.__names + + def _name_set(self): + with contextlib.suppress(AttributeError): + return self.__lookup + self.__lookup = super(FastLookup, self)._name_set() + return self.__lookup + + +def _pathlib_compat(path): + """ + For path-like objects, convert to a filename for compatibility + on Python 3.6.1 and earlier. + """ + try: + return path.__fspath__() + except AttributeError: + return str(path) + + +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = zipfile.ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'mem/abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('mem/abcde.zip', 'a.txt') + >>> b + Path('mem/abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('mem/abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text() + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coercion to string: + + >>> import os + >>> str(c).replace(os.sep, posixpath.sep) + 'mem/abcde.zip/b/c.txt' + + At the root, ``name``, ``filename``, and ``parent`` + resolve to the zipfile. Note these attributes are not + valid and will raise a ``ValueError`` if the zipfile + has no filename. + + >>> root.name + 'abcde.zip' + >>> str(root.filename).replace(os.sep, posixpath.sep) + 'mem/abcde.zip' + >>> str(root.parent) + 'mem' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + """ + Construct a Path from a ZipFile or filename. + + Note: When the source is an existing ZipFile object, + its type (__class__) will be mutated to a + specialized type. If the caller wishes to retain the + original type, the caller should either create a + separate ZipFile object or pass a filename. + """ + self.root = FastLookup.make(root) + self.at = at + + def open(self, mode='r', *args, pwd=None, **kwargs): + """ + Open this entry as text or binary following the semantics + of ``pathlib.Path.open()`` by passing arguments through + to io.TextIOWrapper(). + """ + if self.is_dir(): + raise IsADirectoryError(self) + zip_mode = mode[0] + if not self.exists() and zip_mode == 'r': + raise FileNotFoundError(self) + stream = self.root.open(self.at, zip_mode, pwd=pwd) + if 'b' in mode: + if args or kwargs: + raise ValueError("encoding args invalid for binary operation") + return stream + return io.TextIOWrapper(stream, *args, **kwargs) + + @property + def name(self): + return pathlib.Path(self.at).name or self.filename.name + + @property + def suffix(self): + return pathlib.Path(self.at).suffix or self.filename.suffix + + @property + def suffixes(self): + return pathlib.Path(self.at).suffixes or self.filename.suffixes + + @property + def stem(self): + return pathlib.Path(self.at).stem or self.filename.stem + + @property + def filename(self): + return pathlib.Path(self.root.filename).joinpath(self.at) + + def read_text(self, *args, **kwargs): + with self.open('r', *args, **kwargs) as strm: + return strm.read() + + def read_bytes(self): + with self.open('rb') as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return self.__class__(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return self.exists() and not self.is_dir() + + def exists(self): + return self.at in self.root._name_set() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self.root.namelist()) + return filter(self._is_child, subs) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def joinpath(self, *other): + next = posixpath.join(self.at, *map(_pathlib_compat, other)) + return self._next(self.root.resolve_dir(next)) + + __truediv__ = joinpath + + @property + def parent(self): + if not self.at: + return self.filename.parent + parent_at = posixpath.dirname(self.at.rstrip('/')) + if parent_at: + parent_at += '/' + return self._next(parent_at) diff --git a/third_party/python/setuptools/pkg_resources/extern/__init__.py b/third_party/python/setuptools/pkg_resources/extern/__init__.py index 4dc3beb2fa..948bcc6094 100644 --- a/third_party/python/setuptools/pkg_resources/extern/__init__.py +++ b/third_party/python/setuptools/pkg_resources/extern/__init__.py @@ -1,3 +1,4 @@ +import importlib.util import sys @@ -20,17 +21,10 @@ class VendorImporter: yield self.vendor_pkg + '.' yield '' - def find_module(self, fullname, path=None): - """ - Return self when fullname starts with root_name and the - target module is one vendored through this importer. - """ + def _module_matches_namespace(self, fullname): + """Figure out if the target module is vendored.""" root, base, target = fullname.partition(self.root_name + '.') - if root: - return - if not any(map(target.startswith, self.vendored_names)): - return - return self + return not root and any(map(target.startswith, self.vendored_names)) def load_module(self, fullname): """ @@ -54,6 +48,20 @@ class VendorImporter: "distribution.".format(**locals()) ) + def create_module(self, spec): + return self.load_module(spec.name) + + def exec_module(self, module): + pass + + def find_spec(self, fullname, path=None, target=None): + """Return a module spec for vendored names.""" + return ( + importlib.util.spec_from_loader(fullname, self) + if self._module_matches_namespace(fullname) + else None + ) + def install(self): """ Install this importer into sys.meta_path if not already present. @@ -62,5 +70,11 @@ class VendorImporter: sys.meta_path.append(self) -names = 'packaging', 'pyparsing', 'appdirs' +names = ( + 'packaging', + 'platformdirs', + 'jaraco', + 'importlib_resources', + 'more_itertools', +) VendorImporter(__name__, names).install() |