summaryrefslogtreecommitdiffstats
path: root/third_party/python/pip_tools/piptools/resolver.py
blob: d46a04a9e3868d80d62d0ff446b24f39ac3d91be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# coding: utf-8
from __future__ import absolute_import, division, print_function, unicode_literals

import copy
from functools import partial
from itertools import chain, count, groupby

from pip._internal.req.constructors import install_req_from_line
from pip._internal.req.req_tracker import update_env_context_manager

from . import click
from .logging import log
from .utils import (
    UNSAFE_PACKAGES,
    format_requirement,
    format_specifier,
    is_pinned_requirement,
    is_url_requirement,
    key_from_ireq,
)

green = partial(click.style, fg="green")
magenta = partial(click.style, fg="magenta")


class RequirementSummary(object):
    """
    Summary of a requirement's properties for comparison purposes.
    """

    def __init__(self, ireq):
        self.req = ireq.req
        self.key = key_from_ireq(ireq)
        self.extras = frozenset(ireq.extras)
        self.specifier = ireq.specifier

    def __eq__(self, other):
        return (
            self.key == other.key
            and self.specifier == other.specifier
            and self.extras == other.extras
        )

    def __hash__(self):
        return hash((self.key, self.specifier, self.extras))

    def __str__(self):
        return repr((self.key, str(self.specifier), sorted(self.extras)))


def combine_install_requirements(repository, ireqs):
    """
    Return a single install requirement that reflects a combination of
    all the inputs.
    """
    # We will store the source ireqs in a _source_ireqs attribute;
    # if any of the inputs have this, then use those sources directly.
    source_ireqs = []
    for ireq in ireqs:
        source_ireqs.extend(getattr(ireq, "_source_ireqs", [ireq]))

    # Optimization. Don't bother with combination logic.
    if len(source_ireqs) == 1:
        return source_ireqs[0]

    # deepcopy the accumulator so as to not modify the inputs
    combined_ireq = copy.deepcopy(source_ireqs[0])
    repository.copy_ireq_dependencies(source_ireqs[0], combined_ireq)

    for ireq in source_ireqs[1:]:
        # NOTE we may be losing some info on dropped reqs here
        combined_ireq.req.specifier &= ireq.req.specifier
        if combined_ireq.constraint:
            # We don't find dependencies for constraint ireqs, so copy them
            # from non-constraints:
            repository.copy_ireq_dependencies(ireq, combined_ireq)
        combined_ireq.constraint &= ireq.constraint
        # Return a sorted, de-duped tuple of extras
        combined_ireq.extras = tuple(
            sorted(set(tuple(combined_ireq.extras) + tuple(ireq.extras)))
        )

    # InstallRequirements objects are assumed to come from only one source, and
    # so they support only a single comes_from entry. This function breaks this
    # model. As a workaround, we deterministically choose a single source for
    # the comes_from entry, and add an extra _source_ireqs attribute to keep
    # track of multiple sources for use within pip-tools.
    if len(source_ireqs) > 1:
        if any(ireq.comes_from is None for ireq in source_ireqs):
            # None indicates package was directly specified.
            combined_ireq.comes_from = None
        else:
            # Populate the comes_from field from one of the sources.
            # Requirement input order is not stable, so we need to sort:
            # We choose the shortest entry in order to keep the printed
            # representation as concise as possible.
            combined_ireq.comes_from = min(
                (ireq.comes_from for ireq in source_ireqs),
                key=lambda x: (len(str(x)), str(x)),
            )
        combined_ireq._source_ireqs = source_ireqs
    return combined_ireq


class Resolver(object):
    def __init__(
        self,
        constraints,
        repository,
        cache,
        prereleases=False,
        clear_caches=False,
        allow_unsafe=False,
    ):
        """
        This class resolves a given set of constraints (a collection of
        InstallRequirement objects) by consulting the given Repository and the
        DependencyCache.
        """
        self.our_constraints = set(constraints)
        self.their_constraints = set()
        self.repository = repository
        self.dependency_cache = cache
        self.prereleases = prereleases
        self.clear_caches = clear_caches
        self.allow_unsafe = allow_unsafe
        self.unsafe_constraints = set()

    @property
    def constraints(self):
        return set(
            self._group_constraints(chain(self.our_constraints, self.their_constraints))
        )

    def resolve_hashes(self, ireqs):
        """
        Finds acceptable hashes for all of the given InstallRequirements.
        """
        log.debug("")
        log.debug("Generating hashes:")
        with self.repository.allow_all_wheels(), log.indentation():
            return {ireq: self.repository.get_hashes(ireq) for ireq in ireqs}

    def resolve(self, max_rounds=10):
        """
        Finds concrete package versions for all the given InstallRequirements
        and their recursive dependencies.  The end result is a flat list of
        (name, version) tuples.  (Or an editable package.)

        Resolves constraints one round at a time, until they don't change
        anymore.  Protects against infinite loops by breaking out after a max
        number rounds.
        """
        if self.clear_caches:
            self.dependency_cache.clear()
            self.repository.clear_caches()

        # Ignore existing packages
        # NOTE: str() wrapping necessary for Python 2/3 compat
        with update_env_context_manager(PIP_EXISTS_ACTION=str("i")):
            for current_round in count(start=1):  # pragma: no branch
                if current_round > max_rounds:
                    raise RuntimeError(
                        "No stable configuration of concrete packages "
                        "could be found for the given constraints after "
                        "{max_rounds} rounds of resolving.\n"
                        "This is likely a bug.".format(max_rounds=max_rounds)
                    )

                log.debug("")
                log.debug(magenta("{:^60}".format("ROUND {}".format(current_round))))
                # If a package version (foo==2.0) was built in a previous round,
                # and in this round a different version of foo needs to be built
                # (i.e. foo==1.0), the directory will exist already, which will
                # cause a pip build failure.  The trick is to start with a new
                # build cache dir for every round, so this can never happen.
                with self.repository.freshen_build_caches():
                    has_changed, best_matches = self._resolve_one_round()
                    log.debug("-" * 60)
                    log.debug(
                        "Result of round {}: {}".format(
                            current_round,
                            "not stable" if has_changed else "stable, done",
                        )
                    )
                if not has_changed:
                    break

        # Only include hard requirements and not pip constraints
        results = {req for req in best_matches if not req.constraint}

        # Filter out unsafe requirements.
        self.unsafe_constraints = set()
        if not self.allow_unsafe:
            # reverse_dependencies is used to filter out packages that are only
            # required by unsafe packages. This logic is incomplete, as it would
            # fail to filter sub-sub-dependencies of unsafe packages. None of the
            # UNSAFE_PACKAGES currently have any dependencies at all (which makes
            # sense for installation tools) so this seems sufficient.
            reverse_dependencies = self.reverse_dependencies(results)
            for req in results.copy():
                required_by = reverse_dependencies.get(req.name.lower(), [])
                if req.name in UNSAFE_PACKAGES or (
                    required_by and all(name in UNSAFE_PACKAGES for name in required_by)
                ):
                    self.unsafe_constraints.add(req)
                    results.remove(req)

        return results

    def _group_constraints(self, constraints):
        """
        Groups constraints (remember, InstallRequirements!) by their key name,
        and combining their SpecifierSets into a single InstallRequirement per
        package.  For example, given the following constraints:

            Django<1.9,>=1.4.2
            django~=1.5
            Flask~=0.7

        This will be combined into a single entry per package:

            django~=1.5,<1.9,>=1.4.2
            flask~=0.7

        """
        constraints = list(constraints)
        for ireq in constraints:
            if ireq.name is None:
                # get_dependencies has side-effect of assigning name to ireq
                # (so we can group by the name below).
                self.repository.get_dependencies(ireq)

        # Sort first by name, i.e. the groupby key. Then within each group,
        # sort editables first.
        # This way, we don't bother with combining editables, since the first
        # ireq will be editable, if one exists.
        for _, ireqs in groupby(
            sorted(constraints, key=(lambda x: (key_from_ireq(x), not x.editable))),
            key=key_from_ireq,
        ):
            yield combine_install_requirements(self.repository, ireqs)

    def _resolve_one_round(self):
        """
        Resolves one level of the current constraints, by finding the best
        match for each package in the repository and adding all requirements
        for those best package versions.  Some of these constraints may be new
        or updated.

        Returns whether new constraints appeared in this round.  If no
        constraints were added or changed, this indicates a stable
        configuration.
        """
        # Sort this list for readability of terminal output
        constraints = sorted(self.constraints, key=key_from_ireq)

        log.debug("Current constraints:")
        with log.indentation():
            for constraint in constraints:
                log.debug(str(constraint))

        log.debug("")
        log.debug("Finding the best candidates:")
        with log.indentation():
            best_matches = {self.get_best_match(ireq) for ireq in constraints}

        # Find the new set of secondary dependencies
        log.debug("")
        log.debug("Finding secondary dependencies:")

        their_constraints = []
        with log.indentation():
            for best_match in best_matches:
                their_constraints.extend(self._iter_dependencies(best_match))
        # Grouping constraints to make clean diff between rounds
        theirs = set(self._group_constraints(their_constraints))

        # NOTE: We need to compare RequirementSummary objects, since
        # InstallRequirement does not define equality
        diff = {RequirementSummary(t) for t in theirs} - {
            RequirementSummary(t) for t in self.their_constraints
        }
        removed = {RequirementSummary(t) for t in self.their_constraints} - {
            RequirementSummary(t) for t in theirs
        }

        has_changed = len(diff) > 0 or len(removed) > 0
        if has_changed:
            log.debug("")
            log.debug("New dependencies found in this round:")
            with log.indentation():
                for new_dependency in sorted(diff, key=key_from_ireq):
                    log.debug("adding {}".format(new_dependency))
            log.debug("Removed dependencies in this round:")
            with log.indentation():
                for removed_dependency in sorted(removed, key=key_from_ireq):
                    log.debug("removing {}".format(removed_dependency))

        # Store the last round's results in the their_constraints
        self.their_constraints = theirs
        return has_changed, best_matches

    def get_best_match(self, ireq):
        """
        Returns a (pinned or editable) InstallRequirement, indicating the best
        match to use for the given InstallRequirement (in the form of an
        InstallRequirement).

        Example:
        Given the constraint Flask>=0.10, may return Flask==0.10.1 at
        a certain moment in time.

        Pinned requirements will always return themselves, i.e.

            Flask==0.10.1 => Flask==0.10.1

        """
        if ireq.editable or is_url_requirement(ireq):
            # NOTE: it's much quicker to immediately return instead of
            # hitting the index server
            best_match = ireq
        elif is_pinned_requirement(ireq):
            # NOTE: it's much quicker to immediately return instead of
            # hitting the index server
            best_match = ireq
        elif ireq.constraint:
            # NOTE: This is not a requirement (yet) and does not need
            # to be resolved
            best_match = ireq
        else:
            best_match = self.repository.find_best_match(
                ireq, prereleases=self.prereleases
            )

        # Format the best match
        log.debug(
            "found candidate {} (constraint was {})".format(
                format_requirement(best_match), format_specifier(ireq)
            )
        )
        best_match.comes_from = ireq.comes_from
        if hasattr(ireq, "_source_ireqs"):
            best_match._source_ireqs = ireq._source_ireqs
        return best_match

    def _iter_dependencies(self, ireq):
        """
        Given a pinned, url, or editable InstallRequirement, collects all the
        secondary dependencies for them, either by looking them up in a local
        cache, or by reaching out to the repository.

        Editable requirements will never be looked up, as they may have
        changed at any time.
        """
        # Pip does not resolve dependencies of constraints. We skip handling
        # constraints here as well to prevent the cache from being polluted.
        # Constraints that are later determined to be dependencies will be
        # marked as non-constraints in later rounds by
        # `combine_install_requirements`, and will be properly resolved.
        # See https://github.com/pypa/pip/
        # blob/6896dfcd831330c13e076a74624d95fa55ff53f4/src/pip/_internal/
        # legacy_resolve.py#L325
        if ireq.constraint:
            return

        if ireq.editable or is_url_requirement(ireq):
            for dependency in self.repository.get_dependencies(ireq):
                yield dependency
            return
        elif not is_pinned_requirement(ireq):
            raise TypeError(
                "Expected pinned or editable requirement, got {}".format(ireq)
            )

        # Now, either get the dependencies from the dependency cache (for
        # speed), or reach out to the external repository to
        # download and inspect the package version and get dependencies
        # from there
        if ireq not in self.dependency_cache:
            log.debug(
                "{} not in cache, need to check index".format(format_requirement(ireq)),
                fg="yellow",
            )
            dependencies = self.repository.get_dependencies(ireq)
            self.dependency_cache[ireq] = sorted(str(ireq.req) for ireq in dependencies)

        # Example: ['Werkzeug>=0.9', 'Jinja2>=2.4']
        dependency_strings = self.dependency_cache[ireq]
        log.debug(
            "{:25} requires {}".format(
                format_requirement(ireq),
                ", ".join(sorted(dependency_strings, key=lambda s: s.lower())) or "-",
            )
        )
        for dependency_string in dependency_strings:
            yield install_req_from_line(
                dependency_string, constraint=ireq.constraint, comes_from=ireq
            )

    def reverse_dependencies(self, ireqs):
        non_editable = [
            ireq for ireq in ireqs if not (ireq.editable or is_url_requirement(ireq))
        ]
        return self.dependency_cache.reverse_dependencies(non_editable)