summaryrefslogtreecommitdiffstats
path: root/third_party/python/pip_tools/piptools/repositories/pypi.py
blob: 7a988bfc1f64f132245fd40791723ccb77953014 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
# coding: utf-8
from __future__ import absolute_import, division, print_function, unicode_literals

import collections
import hashlib
import itertools
import logging
import os
from contextlib import contextmanager
from shutil import rmtree

from pip._internal.cache import WheelCache
from pip._internal.cli.progress_bars import BAR_TYPES
from pip._internal.commands import create_command
from pip._internal.models.index import PackageIndex, PyPI
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.req import RequirementSet
from pip._internal.req.req_tracker import get_requirement_tracker
from pip._internal.utils.hashes import FAVORITE_HASH
from pip._internal.utils.logging import indent_log, setup_logging
from pip._internal.utils.misc import normalize_path
from pip._internal.utils.temp_dir import TempDirectory, global_tempdir_manager
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._vendor.requests import RequestException

from .._compat import PIP_VERSION, TemporaryDirectory, contextlib, makedirs
from ..click import progressbar
from ..exceptions import NoCandidateFound
from ..logging import log
from ..utils import (
    as_tuple,
    fs_str,
    is_pinned_requirement,
    is_url_requirement,
    lookup_table,
    make_install_requirement,
)
from .base import BaseRepository

FILE_CHUNK_SIZE = 4096
FileStream = collections.namedtuple("FileStream", "stream size")


class PyPIRepository(BaseRepository):
    DEFAULT_INDEX_URL = PyPI.simple_url
    HASHABLE_PACKAGE_TYPES = {"bdist_wheel", "sdist"}

    """
    The PyPIRepository will use the provided Finder instance to lookup
    packages.  Typically, it looks up packages on PyPI (the default implicit
    config), but any other PyPI mirror can be used if index_urls is
    changed/configured on the Finder.
    """

    def __init__(self, pip_args, cache_dir):
        # Use pip's parser for pip.conf management and defaults.
        # General options (find_links, index_url, extra_index_url, trusted_host,
        # and pre) are deferred to pip.
        self.command = create_command("install")
        extra_pip_args = (
            []
            if PIP_VERSION[:2] <= (20, 2)
            else ["--use-deprecated", "legacy-resolver"]
        )
        self.options, _ = self.command.parse_args(pip_args + extra_pip_args)
        if self.options.cache_dir:
            self.options.cache_dir = normalize_path(self.options.cache_dir)

        self.options.require_hashes = False
        self.options.ignore_dependencies = False

        self.session = self.command._build_session(self.options)
        self.finder = self.command._build_package_finder(
            options=self.options, session=self.session
        )

        # Caches
        # stores project_name => InstallationCandidate mappings for all
        # versions reported by PyPI, so we only have to ask once for each
        # project
        self._available_candidates_cache = {}

        # stores InstallRequirement => list(InstallRequirement) mappings
        # of all secondary dependencies for the given requirement, so we
        # only have to go to disk once for each requirement
        self._dependencies_cache = {}

        # Setup file paths
        self._build_dir = None
        self._source_dir = None
        self._cache_dir = normalize_path(cache_dir)
        self._download_dir = fs_str(os.path.join(self._cache_dir, "pkgs"))
        if PIP_VERSION[:2] <= (20, 2):
            self._wheel_download_dir = fs_str(os.path.join(self._cache_dir, "wheels"))

        self._setup_logging()

    @contextmanager
    def freshen_build_caches(self):
        """
        Start with fresh build/source caches.  Will remove any old build
        caches from disk automatically.
        """
        self._build_dir = TemporaryDirectory(fs_str("build"))
        self._source_dir = TemporaryDirectory(fs_str("source"))
        try:
            yield
        finally:
            self._build_dir.cleanup()
            self._build_dir = None
            self._source_dir.cleanup()
            self._source_dir = None

    @property
    def build_dir(self):
        return self._build_dir.name if self._build_dir else None

    @property
    def source_dir(self):
        return self._source_dir.name if self._source_dir else None

    def clear_caches(self):
        rmtree(self._download_dir, ignore_errors=True)
        if PIP_VERSION[:2] <= (20, 2):
            rmtree(self._wheel_download_dir, ignore_errors=True)

    def find_all_candidates(self, req_name):
        if req_name not in self._available_candidates_cache:
            candidates = self.finder.find_all_candidates(req_name)
            self._available_candidates_cache[req_name] = candidates
        return self._available_candidates_cache[req_name]

    def find_best_match(self, ireq, prereleases=None):
        """
        Returns a Version object that indicates the best match for the given
        InstallRequirement according to the external repository.
        """
        if ireq.editable or is_url_requirement(ireq):
            return ireq  # return itself as the best match

        all_candidates = self.find_all_candidates(ireq.name)
        candidates_by_version = lookup_table(all_candidates, key=lambda c: c.version)
        matching_versions = ireq.specifier.filter(
            (candidate.version for candidate in all_candidates), prereleases=prereleases
        )

        matching_candidates = list(
            itertools.chain.from_iterable(
                candidates_by_version[ver] for ver in matching_versions
            )
        )
        if not matching_candidates:
            raise NoCandidateFound(ireq, all_candidates, self.finder)

        evaluator = self.finder.make_candidate_evaluator(ireq.name)
        best_candidate_result = evaluator.compute_best_candidate(matching_candidates)
        best_candidate = best_candidate_result.best_candidate

        # Turn the candidate into a pinned InstallRequirement
        return make_install_requirement(
            best_candidate.name,
            best_candidate.version,
            ireq.extras,
            constraint=ireq.constraint,
        )

    def resolve_reqs(self, download_dir, ireq, wheel_cache):
        with get_requirement_tracker() as req_tracker, TempDirectory(
            kind="resolver"
        ) as temp_dir, indent_log():
            preparer_kwargs = dict(
                temp_build_dir=temp_dir,
                options=self.options,
                req_tracker=req_tracker,
                session=self.session,
                finder=self.finder,
                use_user_site=False,
                download_dir=download_dir,
            )
            if PIP_VERSION[:2] <= (20, 2):
                preparer_kwargs["wheel_download_dir"] = self._wheel_download_dir
            preparer = self.command.make_requirement_preparer(**preparer_kwargs)

            reqset = RequirementSet()
            if PIP_VERSION[:2] <= (20, 1):
                ireq.is_direct = True
            else:
                ireq.user_supplied = True
            reqset.add_requirement(ireq)

            resolver = self.command.make_resolver(
                preparer=preparer,
                finder=self.finder,
                options=self.options,
                wheel_cache=wheel_cache,
                use_user_site=False,
                ignore_installed=True,
                ignore_requires_python=False,
                force_reinstall=False,
                upgrade_strategy="to-satisfy-only",
            )
            results = resolver._resolve_one(reqset, ireq)
            if not ireq.prepared:
                # If still not prepared, e.g. a constraint, do enough to assign
                # the ireq a name:
                if PIP_VERSION[:2] <= (20, 2):
                    resolver._get_abstract_dist_for(ireq)
                else:
                    resolver._get_dist_for(ireq)

        return set(results)

    def get_dependencies(self, ireq):
        """
        Given a pinned, URL, or editable InstallRequirement, returns a set of
        dependencies (also InstallRequirements, but not necessarily pinned).
        They indicate the secondary dependencies for the given requirement.
        """
        if not (
            ireq.editable or is_url_requirement(ireq) or is_pinned_requirement(ireq)
        ):
            raise TypeError(
                "Expected url, pinned or editable InstallRequirement, got {}".format(
                    ireq
                )
            )

        if ireq not in self._dependencies_cache:
            if ireq.editable and (ireq.source_dir and os.path.exists(ireq.source_dir)):
                # No download_dir for locally available editable requirements.
                # If a download_dir is passed, pip will unnecessarily archive
                # the entire source directory
                download_dir = None
            elif ireq.link and ireq.link.is_vcs:
                # No download_dir for VCS sources.  This also works around pip
                # using git-checkout-index, which gets rid of the .git dir.
                download_dir = None
            else:
                download_dir = self._get_download_path(ireq)
                makedirs(download_dir, exist_ok=True)
            if PIP_VERSION[:2] <= (20, 2):
                makedirs(self._wheel_download_dir, exist_ok=True)

            with global_tempdir_manager():
                wheel_cache = WheelCache(self._cache_dir, self.options.format_control)
                self._dependencies_cache[ireq] = self.resolve_reqs(
                    download_dir, ireq, wheel_cache
                )

        return self._dependencies_cache[ireq]

    def copy_ireq_dependencies(self, source, dest):
        try:
            self._dependencies_cache[dest] = self._dependencies_cache[source]
        except KeyError:
            # `source` may not be in cache yet.
            pass

    def _get_project(self, ireq):
        """
        Return a dict of a project info from PyPI JSON API for a given
        InstallRequirement. Return None on HTTP/JSON error or if a package
        is not found on PyPI server.

        API reference: https://warehouse.readthedocs.io/api-reference/json/
        """
        package_indexes = (
            PackageIndex(url=index_url, file_storage_domain="")
            for index_url in self.finder.search_scope.index_urls
        )
        for package_index in package_indexes:
            url = "{url}/{name}/json".format(url=package_index.pypi_url, name=ireq.name)
            try:
                response = self.session.get(url)
            except RequestException as e:
                log.debug(
                    "Fetch package info from PyPI failed: {url}: {e}".format(
                        url=url, e=e
                    )
                )
                continue

            # Skip this PyPI server, because there is no package
            # or JSON API might be not supported
            if response.status_code == 404:
                continue

            try:
                data = response.json()
            except ValueError as e:
                log.debug(
                    "Cannot parse JSON response from PyPI: {url}: {e}".format(
                        url=url, e=e
                    )
                )
                continue
            return data
        return None

    def _get_download_path(self, ireq):
        """
        Determine the download dir location in a way which avoids name
        collisions.
        """
        if ireq.link:
            salt = hashlib.sha224(ireq.link.url_without_fragment.encode()).hexdigest()
            # Nest directories to avoid running out of top level dirs on some FS
            # (see pypi _get_cache_path_parts, which inspired this)
            salt = [salt[:2], salt[2:4], salt[4:6], salt[6:]]
            return os.path.join(self._download_dir, *salt)
        else:
            return self._download_dir

    def get_hashes(self, ireq):
        """
        Given an InstallRequirement, return a set of hashes that represent all
        of the files for a given requirement. Unhashable requirements return an
        empty set. Unpinned requirements raise a TypeError.
        """

        if ireq.link:
            link = ireq.link

            if link.is_vcs or (link.is_file and link.is_existing_dir()):
                # Return empty set for unhashable requirements.
                # Unhashable logic modeled on pip's
                # RequirementPreparer.prepare_linked_requirement
                return set()

            if is_url_requirement(ireq):
                # Directly hash URL requirements.
                # URL requirements may have been previously downloaded and cached
                # locally by self.resolve_reqs()
                cached_path = os.path.join(self._get_download_path(ireq), link.filename)
                if os.path.exists(cached_path):
                    cached_link = Link(path_to_url(cached_path))
                else:
                    cached_link = link
                return {self._get_file_hash(cached_link)}

        if not is_pinned_requirement(ireq):
            raise TypeError("Expected pinned requirement, got {}".format(ireq))

        log.debug(ireq.name)

        with log.indentation():
            hashes = self._get_hashes_from_pypi(ireq)
            if hashes is None:
                log.log("Couldn't get hashes from PyPI, fallback to hashing files")
                return self._get_hashes_from_files(ireq)

        return hashes

    def _get_hashes_from_pypi(self, ireq):
        """
        Return a set of hashes from PyPI JSON API for a given InstallRequirement.
        Return None if fetching data is failed or missing digests.
        """
        project = self._get_project(ireq)
        if project is None:
            return None

        _, version, _ = as_tuple(ireq)

        try:
            release_files = project["releases"][version]
        except KeyError:
            log.debug("Missing release files on PyPI")
            return None

        try:
            hashes = {
                "{algo}:{digest}".format(
                    algo=FAVORITE_HASH, digest=file_["digests"][FAVORITE_HASH]
                )
                for file_ in release_files
                if file_["packagetype"] in self.HASHABLE_PACKAGE_TYPES
            }
        except KeyError:
            log.debug("Missing digests of release files on PyPI")
            return None

        return hashes

    def _get_hashes_from_files(self, ireq):
        """
        Return a set of hashes for all release files of a given InstallRequirement.
        """
        # We need to get all of the candidates that match our current version
        # pin, these will represent all of the files that could possibly
        # satisfy this constraint.
        all_candidates = self.find_all_candidates(ireq.name)
        candidates_by_version = lookup_table(all_candidates, key=lambda c: c.version)
        matching_versions = list(
            ireq.specifier.filter((candidate.version for candidate in all_candidates))
        )
        matching_candidates = candidates_by_version[matching_versions[0]]

        return {
            self._get_file_hash(candidate.link) for candidate in matching_candidates
        }

    def _get_file_hash(self, link):
        log.debug("Hashing {}".format(link.show_url))
        h = hashlib.new(FAVORITE_HASH)
        with open_local_or_remote_file(link, self.session) as f:
            # Chunks to iterate
            chunks = iter(lambda: f.stream.read(FILE_CHUNK_SIZE), b"")

            # Choose a context manager depending on verbosity
            if log.verbosity >= 1:
                iter_length = f.size / FILE_CHUNK_SIZE if f.size else None
                bar_template = "{prefix}  |%(bar)s| %(info)s".format(
                    prefix=" " * log.current_indent
                )
                context_manager = progressbar(
                    chunks,
                    length=iter_length,
                    # Make it look like default pip progress bar
                    fill_char="█",
                    empty_char=" ",
                    bar_template=bar_template,
                    width=32,
                )
            else:
                context_manager = contextlib.nullcontext(chunks)

            # Iterate over the chosen context manager
            with context_manager as bar:
                for chunk in bar:
                    h.update(chunk)
        return ":".join([FAVORITE_HASH, h.hexdigest()])

    @contextmanager
    def allow_all_wheels(self):
        """
        Monkey patches pip.Wheel to allow wheels from all platforms and Python versions.

        This also saves the candidate cache and set a new one, or else the results from
        the previous non-patched calls will interfere.
        """

        def _wheel_supported(self, tags=None):
            # Ignore current platform. Support everything.
            return True

        def _wheel_support_index_min(self, tags=None):
            # All wheels are equal priority for sorting.
            return 0

        original_wheel_supported = Wheel.supported
        original_support_index_min = Wheel.support_index_min
        original_cache = self._available_candidates_cache

        Wheel.supported = _wheel_supported
        Wheel.support_index_min = _wheel_support_index_min
        self._available_candidates_cache = {}

        try:
            yield
        finally:
            Wheel.supported = original_wheel_supported
            Wheel.support_index_min = original_support_index_min
            self._available_candidates_cache = original_cache

    def _setup_logging(self):
        """
        Setup pip's logger. Ensure pip is verbose same as pip-tools and sync
        pip's log stream with LogContext.stream.
        """
        # Default pip's logger is noisy, so decrease it's verbosity
        setup_logging(
            verbosity=log.verbosity - 1,
            no_color=self.options.no_color,
            user_log_file=self.options.log,
        )

        # Sync pip's console handler stream with LogContext.stream
        logger = logging.getLogger()
        for handler in logger.handlers:
            if handler.name == "console":  # pragma: no branch
                handler.stream = log.stream
                break
        else:  # pragma: no cover
            # There is always a console handler. This warning would be a signal that
            # this block should be removed/revisited, because of pip possibly
            # refactored-out logging config.
            log.warning("Couldn't find a 'console' logging handler")

        # Sync pip's progress bars stream with LogContext.stream
        for bar_cls in itertools.chain(*BAR_TYPES.values()):
            bar_cls.file = log.stream


@contextmanager
def open_local_or_remote_file(link, session):
    """
    Open local or remote file for reading.

    :type link: pip.index.Link
    :type session: requests.Session
    :raises ValueError: If link points to a local directory.
    :return: a context manager to a FileStream with the opened file-like object
    """
    url = link.url_without_fragment

    if link.is_file:
        # Local URL
        local_path = url_to_path(url)
        if os.path.isdir(local_path):
            raise ValueError("Cannot open directory for read: {}".format(url))
        else:
            st = os.stat(local_path)
            with open(local_path, "rb") as local_file:
                yield FileStream(stream=local_file, size=st.st_size)
    else:
        # Remote URL
        headers = {"Accept-Encoding": "identity"}
        response = session.get(url, headers=headers, stream=True)

        # Content length must be int or None
        try:
            content_length = int(response.headers["content-length"])
        except (ValueError, KeyError, TypeError):
            content_length = None

        try:
            yield FileStream(stream=response.raw, size=content_length)
        finally:
            response.close()