# coding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import collections import hashlib import itertools import logging import os from contextlib import contextmanager from shutil import rmtree from pip._internal.cache import WheelCache from pip._internal.cli.progress_bars import BAR_TYPES from pip._internal.commands import create_command from pip._internal.models.index import PackageIndex, PyPI from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.req import RequirementSet from pip._internal.req.req_tracker import get_requirement_tracker from pip._internal.utils.hashes import FAVORITE_HASH from pip._internal.utils.logging import indent_log, setup_logging from pip._internal.utils.misc import normalize_path from pip._internal.utils.temp_dir import TempDirectory, global_tempdir_manager from pip._internal.utils.urls import path_to_url, url_to_path from pip._vendor.requests import RequestException from .._compat import PIP_VERSION, TemporaryDirectory, contextlib, makedirs from ..click import progressbar from ..exceptions import NoCandidateFound from ..logging import log from ..utils import ( as_tuple, fs_str, is_pinned_requirement, is_url_requirement, lookup_table, make_install_requirement, ) from .base import BaseRepository FILE_CHUNK_SIZE = 4096 FileStream = collections.namedtuple("FileStream", "stream size") class PyPIRepository(BaseRepository): DEFAULT_INDEX_URL = PyPI.simple_url HASHABLE_PACKAGE_TYPES = {"bdist_wheel", "sdist"} """ The PyPIRepository will use the provided Finder instance to lookup packages. Typically, it looks up packages on PyPI (the default implicit config), but any other PyPI mirror can be used if index_urls is changed/configured on the Finder. """ def __init__(self, pip_args, cache_dir): # Use pip's parser for pip.conf management and defaults. # General options (find_links, index_url, extra_index_url, trusted_host, # and pre) are deferred to pip. self.command = create_command("install") extra_pip_args = ( [] if PIP_VERSION[:2] <= (20, 2) else ["--use-deprecated", "legacy-resolver"] ) self.options, _ = self.command.parse_args(pip_args + extra_pip_args) if self.options.cache_dir: self.options.cache_dir = normalize_path(self.options.cache_dir) self.options.require_hashes = False self.options.ignore_dependencies = False self.session = self.command._build_session(self.options) self.finder = self.command._build_package_finder( options=self.options, session=self.session ) # Caches # stores project_name => InstallationCandidate mappings for all # versions reported by PyPI, so we only have to ask once for each # project self._available_candidates_cache = {} # stores InstallRequirement => list(InstallRequirement) mappings # of all secondary dependencies for the given requirement, so we # only have to go to disk once for each requirement self._dependencies_cache = {} # Setup file paths self._build_dir = None self._source_dir = None self._cache_dir = normalize_path(cache_dir) self._download_dir = fs_str(os.path.join(self._cache_dir, "pkgs")) if PIP_VERSION[:2] <= (20, 2): self._wheel_download_dir = fs_str(os.path.join(self._cache_dir, "wheels")) self._setup_logging() @contextmanager def freshen_build_caches(self): """ Start with fresh build/source caches. Will remove any old build caches from disk automatically. """ self._build_dir = TemporaryDirectory(fs_str("build")) self._source_dir = TemporaryDirectory(fs_str("source")) try: yield finally: self._build_dir.cleanup() self._build_dir = None self._source_dir.cleanup() self._source_dir = None @property def build_dir(self): return self._build_dir.name if self._build_dir else None @property def source_dir(self): return self._source_dir.name if self._source_dir else None def clear_caches(self): rmtree(self._download_dir, ignore_errors=True) if PIP_VERSION[:2] <= (20, 2): rmtree(self._wheel_download_dir, ignore_errors=True) def find_all_candidates(self, req_name): if req_name not in self._available_candidates_cache: candidates = self.finder.find_all_candidates(req_name) self._available_candidates_cache[req_name] = candidates return self._available_candidates_cache[req_name] def find_best_match(self, ireq, prereleases=None): """ Returns a Version object that indicates the best match for the given InstallRequirement according to the external repository. """ if ireq.editable or is_url_requirement(ireq): return ireq # return itself as the best match all_candidates = self.find_all_candidates(ireq.name) candidates_by_version = lookup_table(all_candidates, key=lambda c: c.version) matching_versions = ireq.specifier.filter( (candidate.version for candidate in all_candidates), prereleases=prereleases ) matching_candidates = list( itertools.chain.from_iterable( candidates_by_version[ver] for ver in matching_versions ) ) if not matching_candidates: raise NoCandidateFound(ireq, all_candidates, self.finder) evaluator = self.finder.make_candidate_evaluator(ireq.name) best_candidate_result = evaluator.compute_best_candidate(matching_candidates) best_candidate = best_candidate_result.best_candidate # Turn the candidate into a pinned InstallRequirement return make_install_requirement( best_candidate.name, best_candidate.version, ireq.extras, constraint=ireq.constraint, ) def resolve_reqs(self, download_dir, ireq, wheel_cache): with get_requirement_tracker() as req_tracker, TempDirectory( kind="resolver" ) as temp_dir, indent_log(): preparer_kwargs = dict( temp_build_dir=temp_dir, options=self.options, req_tracker=req_tracker, session=self.session, finder=self.finder, use_user_site=False, download_dir=download_dir, ) if PIP_VERSION[:2] <= (20, 2): preparer_kwargs["wheel_download_dir"] = self._wheel_download_dir preparer = self.command.make_requirement_preparer(**preparer_kwargs) reqset = RequirementSet() if PIP_VERSION[:2] <= (20, 1): ireq.is_direct = True else: ireq.user_supplied = True reqset.add_requirement(ireq) resolver = self.command.make_resolver( preparer=preparer, finder=self.finder, options=self.options, wheel_cache=wheel_cache, use_user_site=False, ignore_installed=True, ignore_requires_python=False, force_reinstall=False, upgrade_strategy="to-satisfy-only", ) results = resolver._resolve_one(reqset, ireq) if not ireq.prepared: # If still not prepared, e.g. a constraint, do enough to assign # the ireq a name: if PIP_VERSION[:2] <= (20, 2): resolver._get_abstract_dist_for(ireq) else: resolver._get_dist_for(ireq) return set(results) def get_dependencies(self, ireq): """ Given a pinned, URL, or editable InstallRequirement, returns a set of dependencies (also InstallRequirements, but not necessarily pinned). They indicate the secondary dependencies for the given requirement. """ if not ( ireq.editable or is_url_requirement(ireq) or is_pinned_requirement(ireq) ): raise TypeError( "Expected url, pinned or editable InstallRequirement, got {}".format( ireq ) ) if ireq not in self._dependencies_cache: if ireq.editable and (ireq.source_dir and os.path.exists(ireq.source_dir)): # No download_dir for locally available editable requirements. # If a download_dir is passed, pip will unnecessarily archive # the entire source directory download_dir = None elif ireq.link and ireq.link.is_vcs: # No download_dir for VCS sources. This also works around pip # using git-checkout-index, which gets rid of the .git dir. download_dir = None else: download_dir = self._get_download_path(ireq) makedirs(download_dir, exist_ok=True) if PIP_VERSION[:2] <= (20, 2): makedirs(self._wheel_download_dir, exist_ok=True) with global_tempdir_manager(): wheel_cache = WheelCache(self._cache_dir, self.options.format_control) self._dependencies_cache[ireq] = self.resolve_reqs( download_dir, ireq, wheel_cache ) return self._dependencies_cache[ireq] def copy_ireq_dependencies(self, source, dest): try: self._dependencies_cache[dest] = self._dependencies_cache[source] except KeyError: # `source` may not be in cache yet. pass def _get_project(self, ireq): """ Return a dict of a project info from PyPI JSON API for a given InstallRequirement. Return None on HTTP/JSON error or if a package is not found on PyPI server. API reference: https://warehouse.readthedocs.io/api-reference/json/ """ package_indexes = ( PackageIndex(url=index_url, file_storage_domain="") for index_url in self.finder.search_scope.index_urls ) for package_index in package_indexes: url = "{url}/{name}/json".format(url=package_index.pypi_url, name=ireq.name) try: response = self.session.get(url) except RequestException as e: log.debug( "Fetch package info from PyPI failed: {url}: {e}".format( url=url, e=e ) ) continue # Skip this PyPI server, because there is no package # or JSON API might be not supported if response.status_code == 404: continue try: data = response.json() except ValueError as e: log.debug( "Cannot parse JSON response from PyPI: {url}: {e}".format( url=url, e=e ) ) continue return data return None def _get_download_path(self, ireq): """ Determine the download dir location in a way which avoids name collisions. """ if ireq.link: salt = hashlib.sha224(ireq.link.url_without_fragment.encode()).hexdigest() # Nest directories to avoid running out of top level dirs on some FS # (see pypi _get_cache_path_parts, which inspired this) salt = [salt[:2], salt[2:4], salt[4:6], salt[6:]] return os.path.join(self._download_dir, *salt) else: return self._download_dir def get_hashes(self, ireq): """ Given an InstallRequirement, return a set of hashes that represent all of the files for a given requirement. Unhashable requirements return an empty set. Unpinned requirements raise a TypeError. """ if ireq.link: link = ireq.link if link.is_vcs or (link.is_file and link.is_existing_dir()): # Return empty set for unhashable requirements. # Unhashable logic modeled on pip's # RequirementPreparer.prepare_linked_requirement return set() if is_url_requirement(ireq): # Directly hash URL requirements. # URL requirements may have been previously downloaded and cached # locally by self.resolve_reqs() cached_path = os.path.join(self._get_download_path(ireq), link.filename) if os.path.exists(cached_path): cached_link = Link(path_to_url(cached_path)) else: cached_link = link return {self._get_file_hash(cached_link)} if not is_pinned_requirement(ireq): raise TypeError("Expected pinned requirement, got {}".format(ireq)) log.debug(ireq.name) with log.indentation(): hashes = self._get_hashes_from_pypi(ireq) if hashes is None: log.log("Couldn't get hashes from PyPI, fallback to hashing files") return self._get_hashes_from_files(ireq) return hashes def _get_hashes_from_pypi(self, ireq): """ Return a set of hashes from PyPI JSON API for a given InstallRequirement. Return None if fetching data is failed or missing digests. """ project = self._get_project(ireq) if project is None: return None _, version, _ = as_tuple(ireq) try: release_files = project["releases"][version] except KeyError: log.debug("Missing release files on PyPI") return None try: hashes = { "{algo}:{digest}".format( algo=FAVORITE_HASH, digest=file_["digests"][FAVORITE_HASH] ) for file_ in release_files if file_["packagetype"] in self.HASHABLE_PACKAGE_TYPES } except KeyError: log.debug("Missing digests of release files on PyPI") return None return hashes def _get_hashes_from_files(self, ireq): """ Return a set of hashes for all release files of a given InstallRequirement. """ # We need to get all of the candidates that match our current version # pin, these will represent all of the files that could possibly # satisfy this constraint. all_candidates = self.find_all_candidates(ireq.name) candidates_by_version = lookup_table(all_candidates, key=lambda c: c.version) matching_versions = list( ireq.specifier.filter((candidate.version for candidate in all_candidates)) ) matching_candidates = candidates_by_version[matching_versions[0]] return { self._get_file_hash(candidate.link) for candidate in matching_candidates } def _get_file_hash(self, link): log.debug("Hashing {}".format(link.show_url)) h = hashlib.new(FAVORITE_HASH) with open_local_or_remote_file(link, self.session) as f: # Chunks to iterate chunks = iter(lambda: f.stream.read(FILE_CHUNK_SIZE), b"") # Choose a context manager depending on verbosity if log.verbosity >= 1: iter_length = f.size / FILE_CHUNK_SIZE if f.size else None bar_template = "{prefix} |%(bar)s| %(info)s".format( prefix=" " * log.current_indent ) context_manager = progressbar( chunks, length=iter_length, # Make it look like default pip progress bar fill_char="█", empty_char=" ", bar_template=bar_template, width=32, ) else: context_manager = contextlib.nullcontext(chunks) # Iterate over the chosen context manager with context_manager as bar: for chunk in bar: h.update(chunk) return ":".join([FAVORITE_HASH, h.hexdigest()]) @contextmanager def allow_all_wheels(self): """ Monkey patches pip.Wheel to allow wheels from all platforms and Python versions. This also saves the candidate cache and set a new one, or else the results from the previous non-patched calls will interfere. """ def _wheel_supported(self, tags=None): # Ignore current platform. Support everything. return True def _wheel_support_index_min(self, tags=None): # All wheels are equal priority for sorting. return 0 original_wheel_supported = Wheel.supported original_support_index_min = Wheel.support_index_min original_cache = self._available_candidates_cache Wheel.supported = _wheel_supported Wheel.support_index_min = _wheel_support_index_min self._available_candidates_cache = {} try: yield finally: Wheel.supported = original_wheel_supported Wheel.support_index_min = original_support_index_min self._available_candidates_cache = original_cache def _setup_logging(self): """ Setup pip's logger. Ensure pip is verbose same as pip-tools and sync pip's log stream with LogContext.stream. """ # Default pip's logger is noisy, so decrease it's verbosity setup_logging( verbosity=log.verbosity - 1, no_color=self.options.no_color, user_log_file=self.options.log, ) # Sync pip's console handler stream with LogContext.stream logger = logging.getLogger() for handler in logger.handlers: if handler.name == "console": # pragma: no branch handler.stream = log.stream break else: # pragma: no cover # There is always a console handler. This warning would be a signal that # this block should be removed/revisited, because of pip possibly # refactored-out logging config. log.warning("Couldn't find a 'console' logging handler") # Sync pip's progress bars stream with LogContext.stream for bar_cls in itertools.chain(*BAR_TYPES.values()): bar_cls.file = log.stream @contextmanager def open_local_or_remote_file(link, session): """ Open local or remote file for reading. :type link: pip.index.Link :type session: requests.Session :raises ValueError: If link points to a local directory. :return: a context manager to a FileStream with the opened file-like object """ url = link.url_without_fragment if link.is_file: # Local URL local_path = url_to_path(url) if os.path.isdir(local_path): raise ValueError("Cannot open directory for read: {}".format(url)) else: st = os.stat(local_path) with open(local_path, "rb") as local_file: yield FileStream(stream=local_file, size=st.st_size) else: # Remote URL headers = {"Accept-Encoding": "identity"} response = session.get(url, headers=headers, stream=True) # Content length must be int or None try: content_length = int(response.headers["content-length"]) except (ValueError, KeyError, TypeError): content_length = None try: yield FileStream(stream=response.raw, size=content_length) finally: response.close()