diff options
Diffstat (limited to 'flit/validate.py')
-rw-r--r-- | flit/validate.py | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/flit/validate.py b/flit/validate.py new file mode 100644 index 0000000..7184b1f --- /dev/null +++ b/flit/validate.py @@ -0,0 +1,301 @@ +"""Validate various pieces of packaging data""" + +import errno +import io +import logging +import os +from pathlib import Path +import re +import requests +import sys + +from .vendorized.readme.rst import render + +log = logging.getLogger(__name__) + +CUSTOM_CLASSIFIERS = frozenset({ + # https://github.com/pypa/warehouse/pull/5440 + 'Private :: Do Not Upload', +}) + + +def get_cache_dir() -> Path: + """Locate a platform-appropriate cache directory for flit to use + + Does not ensure that the cache directory exists. + """ + # Linux, Unix, AIX, etc. + if os.name == 'posix' and sys.platform != 'darwin': + # use ~/.cache if empty OR not set + xdg = os.environ.get("XDG_CACHE_HOME", None) \ + or os.path.expanduser('~/.cache') + return Path(xdg, 'flit') + + # Mac OS + elif sys.platform == 'darwin': + return Path(os.path.expanduser('~'), 'Library/Caches/flit') + + # Windows (hopefully) + else: + local = os.environ.get('LOCALAPPDATA', None) \ + or os.path.expanduser('~\\AppData\\Local') + return Path(local, 'flit') + + +def _read_classifiers_cached(): + """Reads classifiers from cached file""" + with (get_cache_dir() / 'classifiers.lst').open(encoding='utf-8') as f: + valid_classifiers = set(l.strip() for l in f) + return valid_classifiers + + +def _download_and_cache_classifiers(): + """Get the list of valid trove classifiers from PyPI""" + log.info('Fetching list of valid trove classifiers') + resp = requests.get( + 'https://pypi.org/pypi?%3Aaction=list_classifiers') + resp.raise_for_status() + + cache_dir = get_cache_dir() + try: + cache_dir.mkdir(parents=True) + except (FileExistsError, PermissionError): + pass + except OSError as e: + # readonly mounted file raises OSError, only these should be captured + if e.errno != errno.EROFS: + raise + + try: + with (cache_dir / 'classifiers.lst').open('wb') as f: + f.write(resp.content) + except (PermissionError, FileNotFoundError): + # cache file could not be created + pass + except OSError as e: + # readonly mounted file raises OSError, only these should be captured + if e.errno != errno.EROFS: + raise + + valid_classifiers = set(l.strip() for l in resp.text.splitlines()) + return valid_classifiers + + +def _verify_classifiers(classifiers, valid_classifiers): + """Check classifiers against a set of known classifiers""" + invalid = classifiers - valid_classifiers + return ["Unrecognised classifier: {!r}".format(c) + for c in sorted(invalid)] + + +def validate_classifiers(classifiers): + """Verify trove classifiers from config file. + + Fetches and caches a list of known classifiers from PyPI. Setting the + environment variable FLIT_NO_NETWORK=1 will skip this if the classifiers + are not already cached. + """ + if not classifiers: + return [] + + problems = [] + classifiers = set(classifiers) + try: + valid_classifiers = _read_classifiers_cached() + valid_classifiers.update(CUSTOM_CLASSIFIERS) + problems = _verify_classifiers(classifiers, valid_classifiers) + except (FileNotFoundError, PermissionError) as e1: + # We haven't yet got the classifiers cached or couldn't read it + pass + else: + if not problems: + return [] + + # Either we don't have the list, or there were unexpected classifiers + # which might have been added since we last fetched it. Fetch and cache. + + if os.environ.get('FLIT_NO_NETWORK', ''): + log.warning( + "Not checking classifiers, because FLIT_NO_NETWORK is set") + return [] + + # Try to download up-to-date list of classifiers + try: + valid_classifiers = _download_and_cache_classifiers() + except requests.ConnectionError: + # The error you get on a train, going through Oregon, without wifi + log.warning( + "Couldn't get list of valid classifiers to check against") + return problems + valid_classifiers.update(CUSTOM_CLASSIFIERS) + return _verify_classifiers(classifiers, valid_classifiers) + + +def validate_entrypoints(entrypoints): + """Check that the loaded entrypoints are valid. + + Expects a dict of dicts, e.g.:: + + {'console_scripts': {'flit': 'flit:main'}} + """ + + def _is_identifier_attr(s): + return all(n.isidentifier() for n in s.split('.')) + + problems = [] + for groupname, group in entrypoints.items(): + for k, v in group.items(): + if ':' in v: + mod, obj = v.split(':', 1) + valid = _is_identifier_attr(mod) and _is_identifier_attr(obj) + else: + valid = _is_identifier_attr(v) + + if not valid: + problems.append('Invalid entry point in group {}: ' + '{} = {}'.format(groupname, k, v)) + return problems + +# Distribution name, not quite the same as a Python identifier +NAME = re.compile(r'^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', re.IGNORECASE) +r'' +VERSION_SPEC = re.compile(r'(~=|===?|!=|<=?|>=?)\s*[A-Z0-9\-_.*+!]+$', re.IGNORECASE) +REQUIREMENT = re.compile(NAME.pattern[:-1] + # Trim '$' + r"""\s*(?P<extras>\[.*\])? + \s*(?P<version>[(=~<>!@][^;]*)? + \s*(?P<envmark>;.*)? + $""", re.IGNORECASE | re.VERBOSE) +MARKER_OP = re.compile(r'(~=|===?|!=|<=?|>=?|\s+in\s+|\s+not in\s+)') + +def validate_name(metadata): + name = metadata.get('name', None) + if name is None or NAME.match(name): + return [] + return ['Invalid name: {!r}'.format(name)] + + +def _valid_version_specifier(s): + for clause in s.split(','): + if not VERSION_SPEC.match(clause.strip()): + return False + return True + +def validate_requires_python(metadata): + spec = metadata.get('requires_python', None) + if spec is None or _valid_version_specifier(spec): + return [] + return ['Invalid requires-python: {!r}'.format(spec)] + +MARKER_VARS = { + 'python_version', 'python_full_version', 'os_name', 'sys_platform', + 'platform_release', 'platform_system', 'platform_version', 'platform_machine', + 'platform_python_implementation', 'implementation_name', + 'implementation_version', 'extra', +} + +def validate_environment_marker(em): + clauses = re.split(r'\s+(?:and|or)\s+', em) + problems = [] + for c in clauses: + # TODO: validate parentheses properly. They're allowed by PEP 508. + parts = MARKER_OP.split(c.strip('()')) + if len(parts) != 3: + problems.append("Invalid expression in environment marker: {!r}".format(c)) + continue + l, op, r = parts + for var in (l.strip(), r.strip()): + if var[:1] in {'"', "'"}: + if len(var) < 2 or var[-1:] != var[:1]: + problems.append("Invalid string in environment marker: {}".format(var)) + elif var not in MARKER_VARS: + problems.append("Invalid variable name in environment marker: {!r}".format(var)) + return problems + +def validate_requires_dist(metadata): + probs = [] + for req in metadata.get('requires_dist', []): + m = REQUIREMENT.match(req) + if not m: + probs.append("Could not parse requirement: {!r}".format(req)) + continue + + extras, version, envmark = m.group('extras', 'version', 'envmark') + if not (extras is None or all(NAME.match(e.strip()) + for e in extras[1:-1].split(','))): + probs.append("Invalid extras in requirement: {!r}".format(req)) + if version is not None: + if version.startswith('(') and version.endswith(')'): + version = version[1:-1] + if version.startswith('@'): + pass # url specifier TODO: validate URL + elif not _valid_version_specifier(version): + print((extras, version, envmark)) + probs.append("Invalid version specifier {!r} in requirement {!r}" + .format(version, req)) + if envmark is not None: + probs.extend(validate_environment_marker(envmark[1:])) + return probs + +def validate_url(url): + if url is None: + return [] + probs = [] + if not url.startswith(('http://', 'https://')): + probs.append("URL {!r} doesn't start with https:// or http://" + .format(url)) + elif not url.split('//', 1)[1]: + probs.append("URL missing address") + return probs + +def validate_project_urls(metadata): + probs = [] + for prurl in metadata.get('project_urls', []): + name, url = prurl.split(',', 1) + url = url.lstrip() + if not name: + probs.append("No name for project URL {!r}".format(url)) + elif len(name) > 32: + probs.append("Project URL name {!r} longer than 32 characters" + .format(name)) + probs.extend(validate_url(url)) + + return probs + + +def validate_readme_rst(metadata): + mimetype = metadata.get('description_content_type', '') + + if mimetype != 'text/x-rst': + return [] + + # rst check + raw_desc = metadata.get('description', '') + stream = io.StringIO() + res = render(raw_desc, stream) + if not res: + return [ + ("The file description seems not to be valid rst for PyPI;" + " it will be interpreted as plain text"), + stream.getvalue(), + ] + + return [] # rst rendered OK + + +def validate_config(config_info): + i = config_info + problems = sum([ + validate_classifiers(i.metadata.get('classifiers')), + validate_entrypoints(i.entrypoints), + validate_name(i.metadata), + validate_requires_python(i.metadata), + validate_requires_dist(i.metadata), + validate_url(i.metadata.get('home_page', None)), + validate_project_urls(i.metadata), + validate_readme_rst(i.metadata) + ], []) + + for p in problems: + log.error(p) + return problems + |