summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozbuild/vendor/vendor_rust.py
blob: f87d2efde8a5937285e350f71f136c9cd59a4f38 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, # You can obtain one at http://mozilla.org/MPL/2.0/.

import errno
import hashlib
import json
import logging
import os
import re
import subprocess
import typing
from collections import defaultdict
from itertools import dropwhile
from pathlib import Path

import mozpack.path as mozpath
import toml
from looseversion import LooseVersion
from mozboot.util import MINIMUM_RUST_VERSION

from mozbuild.base import BuildEnvironmentNotFoundException, MozbuildObject

if typing.TYPE_CHECKING:
    import datetime

# Type of a TOML value.
TomlItem = typing.Union[
    str,
    typing.List["TomlItem"],
    typing.Dict[str, "TomlItem"],
    bool,
    int,
    float,
    "datetime.datetime",
    "datetime.date",
    "datetime.time",
]


CARGO_CONFIG_TEMPLATE = """\
# This file contains vendoring instructions for cargo.
# It was generated by `mach vendor rust`.
# Please do not edit.

{config}

# Take advantage of the fact that cargo will treat lines starting with #
# as comments to add preprocessing directives. This file can thus by copied
# as-is to $topsrcdir/.cargo/config with no preprocessing to be used there
# (for e.g. independent tasks building rust code), or be preprocessed by
# the build system to produce a .cargo/config with the right content.
#define REPLACE_NAME {replace_name}
#define VENDORED_DIRECTORY {directory}
# We explicitly exclude the following section when preprocessing because
# it would overlap with the preprocessed [source."@REPLACE_NAME@"], and
# cargo would fail.
#ifndef REPLACE_NAME
[source.{replace_name}]
directory = "{directory}"
#endif

# Thankfully, @REPLACE_NAME@ is unlikely to be a legitimate source, so
# cargo will ignore it when it's here verbatim.
#filter substitution
[source."@REPLACE_NAME@"]
directory = "@top_srcdir@/@VENDORED_DIRECTORY@"
"""


CARGO_LOCK_NOTICE = """
NOTE: `cargo vendor` may have made changes to your Cargo.lock. To restore your
Cargo.lock to the HEAD version, run `git checkout -- Cargo.lock` or
`hg revert Cargo.lock`.
"""


WINDOWS_UNDESIRABLE_REASON = """\
The windows and windows-sys crates and their dependencies are too big to \
vendor, and is a risk of version duplication due to its current update \
cadence. Until this is worked out with upstream, we prefer to avoid them.\
"""

PACKAGES_WE_DONT_WANT = {
    "windows-sys": WINDOWS_UNDESIRABLE_REASON,
    "windows": WINDOWS_UNDESIRABLE_REASON,
    "windows_aarch64_msvc": WINDOWS_UNDESIRABLE_REASON,
    "windows_i686_gnu": WINDOWS_UNDESIRABLE_REASON,
    "windows_i686_msvc": WINDOWS_UNDESIRABLE_REASON,
    "windows_x86_64_gnu": WINDOWS_UNDESIRABLE_REASON,
    "windows_x86_64_msvc": WINDOWS_UNDESIRABLE_REASON,
}

PACKAGES_WE_ALWAYS_WANT_AN_OVERRIDE_OF = [
    "autocfg",
    "cmake",
    "vcpkg",
]


# Historically duplicated crates. Eventually we want this list to be empty.
# If you do need to make changes increasing the number of duplicates, please
# add a comment as to why.
TOLERATED_DUPES = {
    "mio": 2,
    # Transition from time 0.1 to 0.3 underway, but chrono is stuck on 0.1
    # and hasn't been updated in 1.5 years (an hypothetical update is
    # expected to remove the dependency on time altogether).
    "time": 2,
}


class VendorRust(MozbuildObject):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._issues = []

    def serialize_issues_json(self):
        return json.dumps(
            {
                "Cargo.lock": [
                    {
                        "path": "Cargo.lock",
                        "column": None,
                        "line": None,
                        "level": "error" if level == logging.ERROR else "warning",
                        "message": msg,
                    }
                    for (level, msg) in self._issues
                ]
            }
        )

    def log(self, level, action, params, format_str):
        if level >= logging.WARNING:
            self._issues.append((level, format_str.format(**params)))
        super().log(level, action, params, format_str)

    def get_cargo_path(self):
        try:
            return self.substs["CARGO"]
        except (BuildEnvironmentNotFoundException, KeyError):
            if "MOZ_AUTOMATION" in os.environ:
                cargo = os.path.join(
                    os.environ["MOZ_FETCHES_DIR"], "rustc", "bin", "cargo"
                )
                assert os.path.exists(cargo)
                return cargo
            # Default if this tree isn't configured.
            from mozfile import which

            cargo = which("cargo")
            if not cargo:
                raise OSError(
                    errno.ENOENT,
                    (
                        "Could not find 'cargo' on your $PATH. "
                        "Hint: have you run `mach build` or `mach configure`?"
                    ),
                )
            return cargo

    def check_cargo_version(self, cargo):
        """
        Ensure that Cargo is new enough.
        """
        out = (
            subprocess.check_output([cargo, "--version"])
            .splitlines()[0]
            .decode("UTF-8")
        )
        if not out.startswith("cargo"):
            return False
        version = LooseVersion(out.split()[1])
        # Cargo 1.68.0 changed vendoring in a way that creates a lot of noise
        # if we go back and forth between vendoring with an older version and
        # a newer version. Only allow the newer versions.
        minimum_rust_version = MINIMUM_RUST_VERSION
        if LooseVersion("1.68.0") >= MINIMUM_RUST_VERSION:
            minimum_rust_version = "1.68.0"
        if version < minimum_rust_version:
            self.log(
                logging.ERROR,
                "cargo_version",
                {},
                "Cargo >= {0} required (install Rust {0} or newer)".format(
                    minimum_rust_version
                ),
            )
            return False
        self.log(logging.DEBUG, "cargo_version", {}, "cargo is new enough")
        return True

    def has_modified_files(self):
        """
        Ensure that there aren't any uncommitted changes to files
        in the working copy, since we're going to change some state
        on the user. Allow changes to Cargo.{toml,lock} since that's
        likely to be a common use case.
        """
        modified = [
            f
            for f in self.repository.get_changed_files("M")
            if os.path.basename(f) not in ("Cargo.toml", "Cargo.lock")
            and not f.startswith("supply-chain/")
        ]
        if modified:
            self.log(
                logging.ERROR,
                "modified_files",
                {},
                """You have uncommitted changes to the following files:

{files}

Please commit or stash these changes before vendoring, or re-run with `--ignore-modified`.
""".format(
                    files="\n".join(sorted(modified))
                ),
            )
        return modified

    def check_openssl(self):
        """
        Set environment flags for building with openssl.

        MacOS doesn't include openssl, but the openssl-sys crate used by
        mach-vendor expects one of the system. It's common to have one
        installed in /usr/local/opt/openssl by homebrew, but custom link
        flags are necessary to build against it.
        """

        test_paths = ["/usr/include", "/usr/local/include"]
        if any(
            [os.path.exists(os.path.join(path, "openssl/ssl.h")) for path in test_paths]
        ):
            # Assume we can use one of these system headers.
            return None

        if os.path.exists("/usr/local/opt/openssl/include/openssl/ssl.h"):
            # Found a likely homebrew install.
            self.log(
                logging.INFO, "openssl", {}, "Using OpenSSL in /usr/local/opt/openssl"
            )
            return {
                "OPENSSL_INCLUDE_DIR": "/usr/local/opt/openssl/include",
                "OPENSSL_LIB_DIR": "/usr/local/opt/openssl/lib",
            }

        self.log(logging.ERROR, "openssl", {}, "OpenSSL not found!")
        return None

    def _ensure_cargo(self):
        """
        Ensures all the necessary cargo bits are installed.

        Returns the path to cargo if successful, None otherwise.
        """
        cargo = self.get_cargo_path()
        if not self.check_cargo_version(cargo):
            return None
        return cargo

    # A whitelist of acceptable license identifiers for the
    # packages.license field from https://spdx.org/licenses/.  Cargo
    # documentation claims that values are checked against the above
    # list and that multiple entries can be separated by '/'.  We
    # choose to list all combinations instead for the sake of
    # completeness and because some entries below obviously do not
    # conform to the format prescribed in the documentation.
    #
    # It is insufficient to have additions to this whitelist reviewed
    # solely by a build peer; any additions must be checked by somebody
    # competent to review licensing minutiae.

    # Licenses for code used at runtime. Please see the above comment before
    # adding anything to this list.
    RUNTIME_LICENSE_WHITELIST = [
        "Apache-2.0",
        "Apache-2.0 WITH LLVM-exception",
        # BSD-2-Clause and BSD-3-Clause are ok, but packages using them
        # must be added to the appropriate section of about:licenses.
        # To encourage people to remember to do that, we do not whitelist
        # the licenses themselves, and we require the packages to be added
        # to RUNTIME_LICENSE_PACKAGE_WHITELIST below.
        "CC0-1.0",
        "ISC",
        "MIT",
        "MPL-2.0",
        "Unicode-DFS-2016",
        "Unlicense",
        "Zlib",
    ]

    # Licenses for code used at build time (e.g. code generators). Please see the above
    # comments before adding anything to this list.
    BUILDTIME_LICENSE_WHITELIST = {
        "BSD-3-Clause": [
            "bindgen",
            "fuchsia-zircon",
            "fuchsia-zircon-sys",
            "fuchsia-cprng",
            "glsl",
            "instant",
        ]
    }

    # This whitelist should only be used for packages that use an acceptable
    # license, but that also need to explicitly mentioned in about:license.
    RUNTIME_LICENSE_PACKAGE_WHITELIST = {
        "BSD-2-Clause": [
            "arrayref",
            "cloudabi",
            "Inflector",
            "mach",
            "qlog",
        ],
        "BSD-3-Clause": [],
    }

    # ICU4X is distributed as individual crates that all share the same LICENSE
    # that will need to be individually added to the allow list below. We'll
    # define the SHA256 once here, to make the review process easier as new
    # ICU4X crates are vendored into the tree.
    ICU4X_LICENSE_SHA256 = (
        "02420cc1b4c26d9a3318d60fd57048d015831249a5b776a1ada75cd227e78630"
    )

    # This whitelist should only be used for packages that use a
    # license-file and for which the license-file entry has been
    # reviewed.  The table is keyed by package names and maps to the
    # sha256 hash of the license file that we reviewed.
    #
    # As above, it is insufficient to have additions to this whitelist
    # reviewed solely by a build peer; any additions must be checked by
    # somebody competent to review licensing minutiae.
    RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST = {
        # MIT
        "deque": "6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb",
        # we're whitelisting this fuchsia crate because it doesn't get built in the final
        # product but has a license-file that needs ignoring
        "fuchsia-cprng": "03b114f53e6587a398931762ee11e2395bfdba252a329940e2c8c9e81813845b",
        # Old ICU4X crates for ICU4X 1.0, see comment above.
        "yoke-derive": ICU4X_LICENSE_SHA256,
        "zerofrom-derive": ICU4X_LICENSE_SHA256,
    }

    @staticmethod
    def runtime_license(package, license_string):
        """Cargo docs say:
        ---
        https://doc.rust-lang.org/cargo/reference/manifest.html

        This is an SPDX 2.1 license expression for this package.  Currently
        crates.io will validate the license provided against a whitelist of
        known license and exception identifiers from the SPDX license list
        2.4.  Parentheses are not currently supported.

        Multiple licenses can be separated with a `/`, although that usage
        is deprecated.  Instead, use a license expression with AND and OR
        operators to get more explicit semantics.
        ---
        But I have no idea how you can meaningfully AND licenses, so
        we will abort if that is detected. We'll handle `/` and OR as
        equivalent and approve is any is in our approved list."""

        # This specific AND combination has been reviewed for encoding_rs.
        if (
            license_string == "(Apache-2.0 OR MIT) AND BSD-3-Clause"
            and package == "encoding_rs"
        ):
            return True

        # This specific AND combination has been reviewed for unicode-ident.
        if (
            license_string == "(MIT OR Apache-2.0) AND Unicode-DFS-2016"
            and package == "unicode-ident"
        ):
            return True

        if re.search(r"\s+AND", license_string):
            return False

        license_list = re.split(r"\s*/\s*|\s+OR\s+", license_string)
        for license in license_list:
            if license in VendorRust.RUNTIME_LICENSE_WHITELIST:
                return True
            if package in VendorRust.RUNTIME_LICENSE_PACKAGE_WHITELIST.get(license, []):
                return True
        return False

    def _check_licenses(self, vendor_dir: str) -> bool:
        def verify_acceptable_license(package: str, license: str) -> bool:
            self.log(
                logging.DEBUG, "package_license", {}, "has license {}".format(license)
            )

            if not self.runtime_license(package, license):
                if license not in self.BUILDTIME_LICENSE_WHITELIST:
                    self.log(
                        logging.ERROR,
                        "package_license_error",
                        {},
                        """Package {} has a non-approved license: {}.

    Please request license review on the package's license.  If the package's license
    is approved, please add it to the whitelist of suitable licenses.
    """.format(
                            package, license
                        ),
                    )
                    return False
                elif package not in self.BUILDTIME_LICENSE_WHITELIST[license]:
                    self.log(
                        logging.ERROR,
                        "package_license_error",
                        {},
                        """Package {} has a license that is approved for build-time dependencies:
    {}
    but the package itself is not whitelisted as being a build-time only package.

    If your package is build-time only, please add it to the whitelist of build-time
    only packages. Otherwise, you need to request license review on the package's license.
    If the package's license is approved, please add it to the whitelist of suitable licenses.
    """.format(
                            package, license
                        ),
                    )
                    return False
            return True

        def check_package(package_name: str) -> bool:
            self.log(
                logging.DEBUG,
                "package_check",
                {},
                "Checking license for {}".format(package_name),
            )

            toml_file = os.path.join(vendor_dir, package_name, "Cargo.toml")
            with open(toml_file, encoding="utf-8") as fh:
                toml_data = toml.load(fh)

            package_entry: typing.Dict[str, TomlItem] = toml_data["package"]
            license = package_entry.get("license", None)
            license_file = package_entry.get("license-file", None)

            if license is not None and type(license) is not str:
                self.log(
                    logging.ERROR,
                    "package_invalid_license_format",
                    {},
                    "package {} has an invalid `license` field (expected a string)".format(
                        package_name
                    ),
                )
                return False

            if license_file is not None and type(license_file) is not str:
                self.log(
                    logging.ERROR,
                    "package_invalid_license_format",
                    {},
                    "package {} has an invalid `license-file` field (expected a string)".format(
                        package_name
                    ),
                )
                return False

            # License information is optional for crates to provide, but
            # we require it.
            if not license and not license_file:
                self.log(
                    logging.ERROR,
                    "package_no_license",
                    {},
                    "package {} does not provide a license".format(package_name),
                )
                return False

            # The Cargo.toml spec suggests that crates should either have
            # `license` or `license-file`, but not both.  We might as well
            # be defensive about that, though.
            if license and license_file:
                self.log(
                    logging.ERROR,
                    "package_many_licenses",
                    {},
                    "package {} provides too many licenses".format(package_name),
                )
                return False

            if license:
                return verify_acceptable_license(package_name, license)

            # otherwise, it's a custom license in a separate file
            assert license_file is not None
            self.log(
                logging.DEBUG,
                "package_license_file",
                {},
                "package has license-file {}".format(license_file),
            )

            if package_name not in self.RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST:
                self.log(
                    logging.ERROR,
                    "package_license_file_unknown",
                    {},
                    """Package {} has an unreviewed license file: {}.

Please request review on the provided license; if approved, the package can be added
to the whitelist of packages whose licenses are suitable.
""".format(
                        package_name, license_file
                    ),
                )
                return False

            approved_hash = self.RUNTIME_LICENSE_FILE_PACKAGE_WHITELIST[package_name]

            with open(
                os.path.join(vendor_dir, package_name, license_file), "rb"
            ) as license_buf:
                current_hash = hashlib.sha256(license_buf.read()).hexdigest()

            if current_hash != approved_hash:
                self.log(
                    logging.ERROR,
                    "package_license_file_mismatch",
                    {},
                    """Package {} has changed its license file: {} (hash {}).

Please request review on the provided license; if approved, please update the
license file's hash.
""".format(
                        package_name, license_file, current_hash
                    ),
                )
                return False
            return True

        # Force all of the packages to be checked for license information
        # before reducing via `all`, so all license issues are found in a
        # single `mach vendor rust` invocation.
        results = [
            check_package(p)
            for p in os.listdir(vendor_dir)
            if os.path.isdir(os.path.join(vendor_dir, p))
        ]
        return all(results)

    def _check_build_rust(self, cargo_lock):
        ret = True
        crates = {}
        for path in Path(self.topsrcdir).glob("build/rust/**/Cargo.toml"):
            with open(path) as fh:
                cargo_toml = toml.load(fh)
                path = path.relative_to(self.topsrcdir)
                package = cargo_toml["package"]
                key = (package["name"], package["version"])
                if key in crates:
                    self.log(
                        logging.ERROR,
                        "build_rust",
                        {
                            "path": crates[key],
                            "path2": path,
                            "crate": key[0],
                            "version": key[1],
                        },
                        "{path} and {path2} both contain {crate} {version}",
                    )
                    ret = False
                crates[key] = path

        for package in cargo_lock["package"]:
            key = (package["name"], package["version"])
            if key in crates and "source" not in package:
                crates.pop(key)

        for ((name, version), path) in crates.items():
            self.log(
                logging.ERROR,
                "build_rust",
                {"path": path, "crate": name, "version": version},
                "{crate} {version} has an override in {path} that is not used",
            )
            ret = False
        return ret

    def vendor(
        self, ignore_modified=False, build_peers_said_large_imports_were_ok=False
    ):
        from mozbuild.mach_commands import cargo_vet

        self.populate_logger()
        self.log_manager.enable_unstructured()
        if not ignore_modified and self.has_modified_files():
            return False

        cargo = self._ensure_cargo()
        if not cargo:
            self.log(logging.ERROR, "cargo_not_found", {}, "Cargo was not found.")
            return False

        relative_vendor_dir = "third_party/rust"
        vendor_dir = mozpath.join(self.topsrcdir, relative_vendor_dir)

        # We use check_call instead of mozprocess to ensure errors are displayed.
        # We do an |update -p| here to regenerate the Cargo.lock file with minimal
        # changes. See bug 1324462
        res = subprocess.run([cargo, "update", "-p", "gkrust"], cwd=self.topsrcdir)
        if res.returncode:
            self.log(logging.ERROR, "cargo_update_failed", {}, "Cargo update failed.")
            return False

        with open(os.path.join(self.topsrcdir, "Cargo.lock")) as fh:
            cargo_lock = toml.load(fh)
            failed = False
            for package in cargo_lock.get("patch", {}).get("unused", []):
                self.log(
                    logging.ERROR,
                    "unused_patch",
                    {"crate": package["name"]},
                    """Unused patch in top-level Cargo.toml for {crate}.""",
                )
                failed = True

            if not self._check_build_rust(cargo_lock):
                failed = True

            grouped = defaultdict(list)
            for package in cargo_lock["package"]:
                if package["name"] in PACKAGES_WE_ALWAYS_WANT_AN_OVERRIDE_OF:
                    # When the in-tree version is used, there is `source` for
                    # it in Cargo.lock, which is what we expect.
                    if package.get("source"):
                        self.log(
                            logging.ERROR,
                            "non_overridden",
                            {
                                "crate": package["name"],
                                "version": package["version"],
                                "source": package["source"],
                            },
                            "Crate {crate} v{version} must be overridden but isn't "
                            "and comes from {source}.",
                        )
                        failed = True
                elif package["name"] in PACKAGES_WE_DONT_WANT:
                    self.log(
                        logging.ERROR,
                        "undesirable",
                        {
                            "crate": package["name"],
                            "version": package["version"],
                            "reason": PACKAGES_WE_DONT_WANT[package["name"]],
                        },
                        "Crate {crate} is not desirable: {reason}",
                    )
                    failed = True
                grouped[package["name"]].append(package)

            for name, packages in grouped.items():
                # Allow to have crates of the same name when one depends on the other.
                num = len(
                    [
                        p
                        for p in packages
                        if all(d.split()[0] != name for d in p.get("dependencies", []))
                    ]
                )
                expected = TOLERATED_DUPES.get(name, 1)
                if num > expected:
                    self.log(
                        logging.ERROR,
                        "duplicate_crate",
                        {
                            "crate": name,
                            "num": num,
                            "expected": expected,
                            "file": Path(__file__).relative_to(self.topsrcdir),
                        },
                        "There are {num} different versions of crate {crate} "
                        "(expected {expected}). Please avoid the extra duplication "
                        "or adjust TOLERATED_DUPES in {file} if not possible "
                        "(but we'd prefer the former).",
                    )
                    failed = True
                elif num < expected and num > 1:
                    self.log(
                        logging.ERROR,
                        "less_duplicate_crate",
                        {
                            "crate": name,
                            "num": num,
                            "expected": expected,
                            "file": Path(__file__).relative_to(self.topsrcdir),
                        },
                        "There are {num} different versions of crate {crate} "
                        "(expected {expected}). Please adjust TOLERATED_DUPES in "
                        "{file} to reflect this improvement.",
                    )
                    failed = True
                elif num < expected and num > 0:
                    self.log(
                        logging.ERROR,
                        "less_duplicate_crate",
                        {
                            "crate": name,
                            "file": Path(__file__).relative_to(self.topsrcdir),
                        },
                        "Crate {crate} is not duplicated anymore. "
                        "Please adjust TOLERATED_DUPES in {file} to reflect this improvement.",
                    )
                    failed = True
                elif name in TOLERATED_DUPES and expected <= 1:
                    self.log(
                        logging.ERROR,
                        "broken_allowed_dupes",
                        {
                            "crate": name,
                            "file": Path(__file__).relative_to(self.topsrcdir),
                        },
                        "Crate {crate} is not duplicated. Remove it from "
                        "TOLERATED_DUPES in {file}.",
                    )
                    failed = True

            for name in TOLERATED_DUPES:
                if name not in grouped:
                    self.log(
                        logging.ERROR,
                        "outdated_allowed_dupes",
                        {
                            "crate": name,
                            "file": Path(__file__).relative_to(self.topsrcdir),
                        },
                        "Crate {crate} is not in Cargo.lock anymore. Remove it from "
                        "TOLERATED_DUPES in {file}.",
                    )
                    failed = True

        # Only emit warnings for cargo-vet for now.
        env = os.environ.copy()
        env["PATH"] = os.pathsep.join(
            (
                str(Path(cargo).parent),
                os.environ["PATH"],
            )
        )
        flags = ["--output-format=json"]
        if "MOZ_AUTOMATION" in os.environ:
            flags.append("--locked")
            flags.append("--frozen")
        res = cargo_vet(
            self,
            flags,
            stdout=subprocess.PIPE,
            env=env,
        )
        if res.returncode:
            vet = json.loads(res.stdout)
            logged_error = False
            for failure in vet.get("failures", []):
                failure["crate"] = failure.pop("name")
                self.log(
                    logging.ERROR,
                    "cargo_vet_failed",
                    failure,
                    "Missing audit for {crate}:{version} (requires {missing_criteria})."
                    " Run `./mach cargo vet` for more information.",
                )
                logged_error = True
            # NOTE: This could log more information, but the violation JSON
            # output isn't super stable yet, so it's probably simpler to tell
            # the caller to run `./mach cargo vet` directly.
            for key in vet.get("violations", {}).keys():
                self.log(
                    logging.ERROR,
                    "cargo_vet_failed",
                    {"key": key},
                    "Violation conflict for {key}. Run `./mach cargo vet` for more information.",
                )
                logged_error = True
            if "error" in vet:
                # NOTE: The error format produced by cargo-vet is from the
                # `miette` crate, and can include a lot of metadata and context.
                # If we want to show more details in the future, we can expand
                # this rendering to also include things like source labels and
                # related error metadata.
                error = vet["error"]
                self.log(
                    logging.ERROR,
                    "cargo_vet_failed",
                    error,
                    "Vet {severity}: {message}",
                )
                if "help" in error:
                    self.log(logging.INFO, "cargo_vet_failed", error, " help: {help}")
                for cause in error.get("causes", []):
                    self.log(
                        logging.INFO,
                        "cargo_vet_failed",
                        {"cause": cause},
                        " cause: {cause}",
                    )
                for related in error.get("related", []):
                    self.log(
                        logging.INFO,
                        "cargo_vet_failed",
                        related,
                        " related {severity}: {message}",
                    )
                self.log(
                    logging.INFO,
                    "cargo_vet_failed",
                    {},
                    "Run `./mach cargo vet` for more information.",
                )
                logged_error = True
            if not logged_error:
                self.log(
                    logging.ERROR,
                    "cargo_vet_failed",
                    {},
                    "Unknown vet error. Run `./mach cargo vet` for more information.",
                )
            failed = True

        # If we failed when checking the crates list and/or running `cargo vet`,
        # stop before invoking `cargo vendor`.
        if failed:
            return False

        res = subprocess.run(
            [cargo, "vendor", vendor_dir], cwd=self.topsrcdir, stdout=subprocess.PIPE
        )
        if res.returncode:
            self.log(logging.ERROR, "cargo_vendor_failed", {}, "Cargo vendor failed.")
            return False
        output = res.stdout.decode("UTF-8")

        # Get the snippet of configuration that cargo vendor outputs, and
        # update .cargo/config with it.
        # XXX(bug 1576765): Hopefully do something better after
        # https://github.com/rust-lang/cargo/issues/7280 is addressed.
        config = "\n".join(
            dropwhile(lambda l: not l.startswith("["), output.splitlines())
        )

        # The config is toml; parse it as such.
        config = toml.loads(config)

        # For each replace-with, extract their configuration and update the
        # corresponding directory to be relative to topsrcdir.
        replaces = {
            v["replace-with"] for v in config["source"].values() if "replace-with" in v
        }

        # We only really expect one replace-with
        if len(replaces) != 1:
            self.log(
                logging.ERROR,
                "vendor_failed",
                {},
                """cargo vendor didn't output a unique replace-with. Found: %s."""
                % replaces,
            )
            return False

        replace_name = replaces.pop()
        replace = config["source"].pop(replace_name)
        replace["directory"] = mozpath.relpath(
            mozpath.normsep(os.path.normcase(replace["directory"])),
            mozpath.normsep(os.path.normcase(self.topsrcdir)),
        )

        cargo_config = os.path.join(self.topsrcdir, ".cargo", "config.in")
        with open(cargo_config, "w", encoding="utf-8", newline="\n") as fh:
            fh.write(
                CARGO_CONFIG_TEMPLATE.format(
                    config=toml.dumps(config),
                    replace_name=replace_name,
                    directory=replace["directory"],
                )
            )

        if not self._check_licenses(vendor_dir):
            self.log(
                logging.ERROR,
                "license_check_failed",
                {},
                """The changes from `mach vendor rust` will NOT be added to version control.

{notice}""".format(
                    notice=CARGO_LOCK_NOTICE
                ),
            )
            self.repository.clean_directory(vendor_dir)
            return False

        self.repository.add_remove_files(vendor_dir)

        # 100k is a reasonable upper bound on source file size.
        FILESIZE_LIMIT = 100 * 1024
        large_files = set()
        cumulative_added_size = 0
        for f in self.repository.get_changed_files("A"):
            path = mozpath.join(self.topsrcdir, f)
            size = os.stat(path).st_size
            cumulative_added_size += size
            if size > FILESIZE_LIMIT:
                large_files.add(f)

        # Forcefully complain about large files being added, as history has
        # shown that large-ish files typically are not needed.
        if large_files and not build_peers_said_large_imports_were_ok:
            self.log(
                logging.ERROR,
                "filesize_check",
                {},
                """The following files exceed the filesize limit of {size}:

{files}

If you can't reduce the size of these files, talk to a build peer (on the #build
channel at https://chat.mozilla.org) about the particular large files you are
adding.

The changes from `mach vendor rust` will NOT be added to version control.

{notice}""".format(
                    files="\n".join(sorted(large_files)),
                    size=FILESIZE_LIMIT,
                    notice=CARGO_LOCK_NOTICE,
                ),
            )
            self.repository.forget_add_remove_files(vendor_dir)
            self.repository.clean_directory(vendor_dir)
            return False

        # Only warn for large imports, since we may just have large code
        # drops from time to time (e.g. importing features into m-c).
        SIZE_WARN_THRESHOLD = 5 * 1024 * 1024
        if cumulative_added_size >= SIZE_WARN_THRESHOLD:
            self.log(
                logging.WARN,
                "filesize_check",
                {},
                """Your changes add {size} bytes of added files.

Please consider finding ways to reduce the size of the vendored packages.
For instance, check the vendored packages for unusually large test or
benchmark files that don't need to be published to crates.io and submit
a pull request upstream to ignore those files when publishing.""".format(
                    size=cumulative_added_size
                ),
            )
        return True