diff options
Diffstat (limited to 'compiler/rustc_codegen_cranelift')
42 files changed, 1263 insertions, 462 deletions
diff --git a/compiler/rustc_codegen_cranelift/.github/workflows/abi-cafe.yml b/compiler/rustc_codegen_cranelift/.github/workflows/abi-cafe.yml index 12aa69d3c..bd3b05118 100644 --- a/compiler/rustc_codegen_cranelift/.github/workflows/abi-cafe.yml +++ b/compiler/rustc_codegen_cranelift/.github/workflows/abi-cafe.yml @@ -35,6 +35,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + if: matrix.os == 'ubuntu-latest' + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: diff --git a/compiler/rustc_codegen_cranelift/.github/workflows/main.yml b/compiler/rustc_codegen_cranelift/.github/workflows/main.yml index 47d9a3b93..05dc28d07 100644 --- a/compiler/rustc_codegen_cranelift/.github/workflows/main.yml +++ b/compiler/rustc_codegen_cranelift/.github/workflows/main.yml @@ -66,6 +66,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + if: matrix.os == 'ubuntu-latest' + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: @@ -136,6 +140,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Prepare dependencies run: ./y.sh prepare @@ -159,6 +166,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: diff --git a/compiler/rustc_codegen_cranelift/.github/workflows/rustc.yml b/compiler/rustc_codegen_cranelift/.github/workflows/rustc.yml index b49dc3aff..cb5dd51fe 100644 --- a/compiler/rustc_codegen_cranelift/.github/workflows/rustc.yml +++ b/compiler/rustc_codegen_cranelift/.github/workflows/rustc.yml @@ -11,6 +11,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: @@ -31,6 +34,9 @@ jobs: steps: - uses: actions/checkout@v3 + - name: CPU features + run: cat /proc/cpuinfo + - name: Cache cargo target dir uses: actions/cache@v3 with: diff --git a/compiler/rustc_codegen_cranelift/Cargo.lock b/compiler/rustc_codegen_cranelift/Cargo.lock index dcb6cc575..901d1dbea 100644 --- a/compiler/rustc_codegen_cranelift/Cargo.lock +++ b/compiler/rustc_codegen_cranelift/Cargo.lock @@ -21,9 +21,9 @@ checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "arbitrary" -version = "1.3.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "bitflags" @@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cranelift-bforest" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f773437307980ac0f424bf9b9a5d0cd21a0f17248c6860c9a65bec8b5975f3fe" +checksum = "76eb38f2af690b5a4411d9a8782b6d77dabff3ca939e0518453ab9f9a4392d41" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443c2ac50e97fb7de1a0f862753fce3f27215558811a6fcee508eb0c3747fa79" +checksum = "39526c036b92912417e8931f52c1e235796688068d3efdbbd8b164f299d19156" dependencies = [ "bumpalo", "cranelift-bforest", @@ -75,39 +75,39 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5b174c411480c79ce0793c55042fa51bec27e486381d103a53cab3b480cb2db" +checksum = "fdb0deedc9fccf2db53a5a3c9c9d0163e44143b0d004dca9bf6ab6a0024cd79a" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73fa0151a528066a369de6debeea4d4b23a32aba68b5add8c46d3dc8091ff434" +checksum = "cea2d1b274e45aa8e61e9103efa1ba82d4b5a19d12bd1fd10744c3b7380ba3ff" [[package]] name = "cranelift-control" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8adf1e6398493c9bea1190e37d28a0eb0eca5fddbc80e01e506cda34db92b1f" +checksum = "6ea5977559a71e63db79a263f0e81a89b996e8a38212c4281e37dd1dbaa8b65c" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4917e2ed3bb5fe87d0ed88395ca6d644018d119a034faedd1f3e1f2c33cd52b2" +checksum = "2f871ada808b58158d84dfc43a6a2e2d2756baaf4ed1c51fd969ca8330e6ca5c" [[package]] name = "cranelift-frontend" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aaadf1e7cf28886bbf046eaf7ef538997bc8a7e020e578ea4957b39da87d5a1" +checksum = "e8e6890f587ef59824b3debe577e68fdf9b307b3808c54b8d93a18fd0b70941b" dependencies = [ "cranelift-codegen", "log", @@ -117,15 +117,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a67fda31b9d69eaa1c49a2081939454c45857596a9d45af6744680541c628b4c" +checksum = "a8d5fc6d5d3b52d1917002b17a8ecce448c2621b5bf394bb4e77e2f676893537" [[package]] name = "cranelift-jit" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bf32710628e7ff298739f1ed80a0bfdafc0c6a3e284c4540b23f18e8889d4b" +checksum = "e8a2d7744f743f59d9646d7589ad22ea17ed0d71e04906eb77c31e99bc13bd8b" dependencies = [ "anyhow", "cranelift-codegen", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d693e93a0fbf56b4bc93cffe6b107c2e52f070e1111950505fc8c83ac440b9d" +checksum = "b96cb196334698e612c197d7d0ae59af5e07667306ec20d7be414717db400873" dependencies = [ "anyhow", "cranelift-codegen", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76fb52ba71be98312f35e798d9e98e45ab2586f27584231bf7c644fa9501e8af" +checksum = "3e10c2e7faa65d4ae7de9a83b44f2c31aca7dc638e17d0a79572fdf8103d720b" dependencies = [ "cranelift-codegen", "libc", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.101.2" +version = "0.102.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2551b2e185022b89e9efa5e04c0f17f679b86ef73d9f7feabc48b608ff23120d" +checksum = "83ce94e18756058af8a66e3c0ba1123ae15517c72162d8060d0cb0974642adf2" dependencies = [ "anyhow", "cranelift-codegen", @@ -295,9 +295,9 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "regalloc2" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b4dcbd3a2ae7fb94b5813fa0e957c6ab51bf5d0a8ee1b69e0c2d0f1e6eb8485" +checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6" dependencies = [ "hashbrown 0.13.2", "log", @@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasmtime-jit-icache-coherence" -version = "14.0.2" +version = "15.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0980a96b16abbdaf829858d2389697b1d6cfc6a903873fd74b7e47a6b1045584" +checksum = "b73ad1395eda136baec5ece7e079e0536a82ef73488e345456cc9b89858ad0ec" dependencies = [ "cfg-if", "libc", diff --git a/compiler/rustc_codegen_cranelift/Cargo.toml b/compiler/rustc_codegen_cranelift/Cargo.toml index 30db10f74..20fcd2227 100644 --- a/compiler/rustc_codegen_cranelift/Cargo.toml +++ b/compiler/rustc_codegen_cranelift/Cargo.toml @@ -8,12 +8,12 @@ crate-type = ["dylib"] [dependencies] # These have to be in sync with each other -cranelift-codegen = { version = "0.101.2", default-features = false, features = ["std", "unwind", "all-arch"] } -cranelift-frontend = { version = "0.101.2" } -cranelift-module = { version = "0.101.2" } -cranelift-native = { version = "0.101.2" } -cranelift-jit = { version = "0.101.2", optional = true } -cranelift-object = { version = "0.101.2" } +cranelift-codegen = { version = "0.102", default-features = false, features = ["std", "unwind", "all-arch"] } +cranelift-frontend = { version = "0.102" } +cranelift-module = { version = "0.102" } +cranelift-native = { version = "0.102" } +cranelift-jit = { version = "0.102", optional = true } +cranelift-object = { version = "0.102" } target-lexicon = "0.12.0" gimli = { version = "0.28", default-features = false, features = ["write"]} object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } diff --git a/compiler/rustc_codegen_cranelift/Readme.md b/compiler/rustc_codegen_cranelift/Readme.md index 1a2b2bbc5..ca6ecdf1d 100644 --- a/compiler/rustc_codegen_cranelift/Readme.md +++ b/compiler/rustc_codegen_cranelift/Readme.md @@ -5,8 +5,48 @@ This has the potential to improve compilation times in debug mode. If your project doesn't use any of the things listed under "Not yet supported", it should work fine. If not please open an issue. +## Download using Rustup + +The Cranelift codegen backend is distributed in nightly builds on Linux and x86_64 macOS. If you want to +install it using Rustup, you can do that by running: + +```bash +$ rustup component add rustc-codegen-cranelift-preview --toolchain nightly +``` + +Once it is installed, you can enable it with one of the following approaches: +- `CARGO_PROFILE_DEV_CODEGEN_BACKEND=cranelift cargo +nightly build -Zcodegen-backend` +- `RUSTFLAGS="-Zcodegen-backend=cranelift" cargo +nightly build` +- Add the following to `.cargo/config.toml`: + ```toml + [unstable] + codegen-backend = true + + [profile.dev] + codegen-backend = "cranelift" + ``` +- Add the following to `Cargo.toml`: + ```toml + # This line needs to come before anything else in Cargo.toml + cargo-features = ["codegen-backend"] + + [profile.dev] + codegen-backend = "cranelift" + ``` + +## Precompiled builds + +You can also download a pre-built version from the [releases] page. +Extract the `dist` directory in the archive anywhere you want. +If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`. +(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)). + +[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev + ## Building and testing +If you want to build the backend manually, you can download it from GitHub and build it yourself: + ```bash $ git clone https://github.com/rust-lang/rustc_codegen_cranelift $ cd rustc_codegen_cranelift @@ -22,15 +62,6 @@ $ ./test.sh For more docs on how to build and test see [build_system/usage.txt](build_system/usage.txt) or the help message of `./y.sh`. -## Precompiled builds - -Alternatively you can download a pre built version from the [releases] page. -Extract the `dist` directory in the archive anywhere you want. -If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`. -(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)). - -[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev - ## Usage rustc_codegen_cranelift can be used as a near-drop-in replacement for `cargo build` or `cargo run` for existing projects. diff --git a/compiler/rustc_codegen_cranelift/build_system/tests.rs b/compiler/rustc_codegen_cranelift/build_system/tests.rs index 10736ff9a..cb7b2454c 100644 --- a/compiler/rustc_codegen_cranelift/build_system/tests.rs +++ b/compiler/rustc_codegen_cranelift/build_system/tests.rs @@ -75,11 +75,6 @@ const BASE_SYSROOT_SUITE: &[TestCase] = &[ "example/arbitrary_self_types_pointers_and_wrappers.rs", &[], ), - TestCase::build_bin_and_run( - "aot.issue_91827_extern_types", - "example/issue-91827-extern-types.rs", - &[], - ), TestCase::build_lib("build.alloc_system", "example/alloc_system.rs", "lib"), TestCase::build_bin_and_run("aot.alloc_example", "example/alloc_example.rs", &[]), TestCase::jit_bin("jit.std_example", "example/std_example.rs", ""), @@ -99,7 +94,20 @@ const BASE_SYSROOT_SUITE: &[TestCase] = &[ TestCase::build_bin_and_run("aot.mod_bench", "example/mod_bench.rs", &[]), TestCase::build_bin_and_run("aot.issue-72793", "example/issue-72793.rs", &[]), TestCase::build_bin("aot.issue-59326", "example/issue-59326.rs"), + TestCase::custom("aot.polymorphize_coroutine", &|runner| { + runner.run_rustc(&["example/polymorphize_coroutine.rs", "-Zpolymorphize"]); + runner.run_out_command("polymorphize_coroutine", &[]); + }), TestCase::build_bin_and_run("aot.neon", "example/neon.rs", &[]), + TestCase::custom("aot.gen_block_iterate", &|runner| { + runner.run_rustc([ + "example/gen_block_iterate.rs", + "--edition", + "2024", + "-Zunstable-options", + ]); + runner.run_out_command("gen_block_iterate", &[]); + }), ]; pub(crate) static RAND_REPO: GitRepo = GitRepo::github( @@ -224,6 +232,13 @@ const EXTENDED_SYSROOT_SUITE: &[TestCase] = &[ if runner.is_native { let mut test_cmd = PORTABLE_SIMD.test(&runner.target_compiler, &runner.dirs); test_cmd.arg("-q"); + // FIXME remove after portable-simd update + test_cmd + .arg("--") + .arg("--skip") + .arg("core_simd::swizzle::simd_swizzle") + .arg("--skip") + .arg("core_simd::vector::Simd<T,N>::lanes"); spawn_and_wait(test_cmd); } }), @@ -457,6 +472,9 @@ impl<'a> TestRunner<'a> { cmd.arg("--target"); cmd.arg(&self.target_compiler.triple); cmd.arg("-Cpanic=abort"); + cmd.arg("-Zunstable-options"); + cmd.arg("--check-cfg=cfg(no_unstable_features)"); + cmd.arg("--check-cfg=cfg(jit)"); cmd.args(args); cmd } diff --git a/compiler/rustc_codegen_cranelift/config.txt b/compiler/rustc_codegen_cranelift/config.txt index 2ccdc7d78..0b7cac188 100644 --- a/compiler/rustc_codegen_cranelift/config.txt +++ b/compiler/rustc_codegen_cranelift/config.txt @@ -42,7 +42,9 @@ aot.float-minmax-pass aot.mod_bench aot.issue-72793 aot.issue-59326 +aot.polymorphize_coroutine aot.neon +aot.gen_block_iterate testsuite.extended_sysroot test.rust-random/rand diff --git a/compiler/rustc_codegen_cranelift/example/gen_block_iterate.rs b/compiler/rustc_codegen_cranelift/example/gen_block_iterate.rs new file mode 100644 index 000000000..25bfe542d --- /dev/null +++ b/compiler/rustc_codegen_cranelift/example/gen_block_iterate.rs @@ -0,0 +1,36 @@ +// Copied from https://github.com/rust-lang/rust/blob/46455dc65069387f2dc46612f13fd45452ab301a/tests/ui/coroutine/gen_block_iterate.rs +// revisions: next old +//compile-flags: --edition 2024 -Zunstable-options +//[next] compile-flags: -Znext-solver +// run-pass +#![feature(gen_blocks)] + +fn foo() -> impl Iterator<Item = u32> { + gen { yield 42; for x in 3..6 { yield x } } +} + +fn moved() -> impl Iterator<Item = u32> { + let mut x = "foo".to_string(); + gen move { + yield 42; + if x == "foo" { return } + x.clear(); + for x in 3..6 { yield x } + } +} + +fn main() { + let mut iter = foo(); + assert_eq!(iter.next(), Some(42)); + assert_eq!(iter.next(), Some(3)); + assert_eq!(iter.next(), Some(4)); + assert_eq!(iter.next(), Some(5)); + assert_eq!(iter.next(), None); + // `gen` blocks are fused + assert_eq!(iter.next(), None); + + let mut iter = moved(); + assert_eq!(iter.next(), Some(42)); + assert_eq!(iter.next(), None); + +} diff --git a/compiler/rustc_codegen_cranelift/example/issue-91827-extern-types.rs b/compiler/rustc_codegen_cranelift/example/issue-91827-extern-types.rs deleted file mode 100644 index 6f39c5edc..000000000 --- a/compiler/rustc_codegen_cranelift/example/issue-91827-extern-types.rs +++ /dev/null @@ -1,55 +0,0 @@ -// Copied from rustc ui test suite - -// run-pass -// -// Test that we can handle unsized types with an extern type tail part. -// Regression test for issue #91827. - -#![feature(extern_types)] - -use std::ptr::addr_of; - -extern "C" { - type Opaque; -} - -unsafe impl Sync for Opaque {} - -#[repr(C)] -pub struct List<T> { - len: usize, - data: [T; 0], - tail: Opaque, -} - -#[repr(C)] -pub struct ListImpl<T, const N: usize> { - len: usize, - data: [T; N], -} - -impl<T> List<T> { - const fn as_slice(&self) -> &[T] { - unsafe { std::slice::from_raw_parts(self.data.as_ptr(), self.len) } - } -} - -impl<T, const N: usize> ListImpl<T, N> { - const fn as_list(&self) -> &List<T> { - unsafe { std::mem::transmute(self) } - } -} - -pub static A: ListImpl<u128, 3> = ListImpl { len: 3, data: [5, 6, 7] }; -pub static A_REF: &'static List<u128> = A.as_list(); -pub static A_TAIL_OFFSET: isize = tail_offset(A.as_list()); - -const fn tail_offset<T>(list: &List<T>) -> isize { - unsafe { (addr_of!(list.tail) as *const u8).offset_from(list as *const List<T> as *const u8) } -} - -fn main() { - assert_eq!(A_REF.as_slice(), &[5, 6, 7]); - // Check that interpreter and code generation agree about the position of the tail field. - assert_eq!(A_TAIL_OFFSET, tail_offset(A_REF)); -} diff --git a/compiler/rustc_codegen_cranelift/example/mini_core.rs b/compiler/rustc_codegen_cranelift/example/mini_core.rs index 934e4b178..3607b7cd9 100644 --- a/compiler/rustc_codegen_cranelift/example/mini_core.rs +++ b/compiler/rustc_codegen_cranelift/example/mini_core.rs @@ -11,7 +11,7 @@ thread_local )] #![no_core] -#![allow(dead_code, internal_features)] +#![allow(dead_code, internal_features, ambiguous_wide_pointer_comparisons)] #[lang = "sized"] pub trait Sized {} diff --git a/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs b/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs index afc51a47f..1d51b499c 100644 --- a/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs +++ b/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs @@ -337,17 +337,6 @@ fn main() { static REF2: &u8 = REF1; assert_eq!(*REF1, *REF2); - extern "C" { - type A; - } - - fn main() { - let x: &A = unsafe { &*(1usize as *const A) }; - - assert_eq!(unsafe { intrinsics::size_of_val(x) }, 0); - assert_eq!(unsafe { intrinsics::min_align_of_val(x) }, 1); - } - #[repr(simd)] struct V([f64; 2]); diff --git a/compiler/rustc_codegen_cranelift/example/polymorphize_coroutine.rs b/compiler/rustc_codegen_cranelift/example/polymorphize_coroutine.rs new file mode 100644 index 000000000..c965b34e1 --- /dev/null +++ b/compiler/rustc_codegen_cranelift/example/polymorphize_coroutine.rs @@ -0,0 +1,16 @@ +#![feature(coroutines, coroutine_trait)] + +use std::ops::Coroutine; +use std::pin::Pin; + +fn main() { + run_coroutine::<i32>(); +} + +fn run_coroutine<T>() { + let mut coroutine = || { + yield; + return; + }; + Pin::new(&mut coroutine).resume(()); +} diff --git a/compiler/rustc_codegen_cranelift/patches/0001-portable-simd-Enable-the-exposed_provenance-feature.patch b/compiler/rustc_codegen_cranelift/patches/0001-portable-simd-Enable-the-exposed_provenance-feature.patch new file mode 100644 index 000000000..b8c0783f5 --- /dev/null +++ b/compiler/rustc_codegen_cranelift/patches/0001-portable-simd-Enable-the-exposed_provenance-feature.patch @@ -0,0 +1,22 @@ +From a101a43b795431ce617e7782afb451f4853afc00 Mon Sep 17 00:00:00 2001 +From: bjorn3 <17426603+bjorn3@users.noreply.github.com> +Date: Thu, 7 Dec 2023 14:51:35 +0000 +Subject: [PATCH] Enable the exposed_provenance feature + +--- + crates/core_simd/tests/pointers.rs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/crates/core_simd/tests/pointers.rs b/crates/core_simd/tests/pointers.rs +index 0ae8f83..06620d6 100644 +--- a/crates/core_simd/tests/pointers.rs ++++ b/crates/core_simd/tests/pointers.rs +@@ -1,4 +1,4 @@ +-#![feature(portable_simd, strict_provenance)] ++#![feature(exposed_provenance, portable_simd, strict_provenance)] + + use core_simd::simd::{Simd, SimdConstPtr, SimdMutPtr}; + +-- +2.34.1 + diff --git a/compiler/rustc_codegen_cranelift/patches/stdlib-lock.toml b/compiler/rustc_codegen_cranelift/patches/stdlib-lock.toml index 8a690bada..8e213f71c 100644 --- a/compiler/rustc_codegen_cranelift/patches/stdlib-lock.toml +++ b/compiler/rustc_codegen_cranelift/patches/stdlib-lock.toml @@ -36,15 +36,18 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] [[package]] name = "cfg-if" @@ -58,9 +61,9 @@ dependencies = [ [[package]] name = "compiler_builtins" -version = "0.1.103" +version = "0.1.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3b73c3443a5fd2438d7ba4853c64e4c8efc2404a9e28a9234cc2d5eebc6c242" +checksum = "99c3f9035afc33f4358773239573f7d121099856753e1bbd2a6a5207098fc741" dependencies = [ "cc", "rustc-std-workspace-core", @@ -124,9 +127,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" dependencies = [ "compiler_builtins", "rustc-std-workspace-alloc", @@ -135,9 +138,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.0" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "allocator-api2", "compiler_builtins", @@ -147,9 +150,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" dependencies = [ "compiler_builtins", "rustc-std-workspace-alloc", @@ -167,9 +170,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" dependencies = [ "compiler_builtins", "rustc-std-workspace-core", @@ -189,9 +192,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "compiler_builtins", "memchr", @@ -241,9 +244,9 @@ dependencies = [ [[package]] name = "r-efi" -version = "4.2.0" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "575fc2d9b3da54adbdfaddf6eca48fec256d977c8630a1750b8991347d1ac911" +checksum = "0e244f96e03a3067f9e521d3167bd42657594cb8588c8d3a2db01545dc1af2e0" dependencies = [ "compiler_builtins", "rustc-std-workspace-core", @@ -402,9 +405,9 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" dependencies = [ "compiler_builtins", "rustc-std-workspace-core", @@ -419,6 +422,18 @@ dependencies = [ "compiler_builtins", "core", "libc", + "unwinding", +] + +[[package]] +name = "unwinding" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37a19a21a537f635c16c7576f22d0f2f7d63353c1337ad4ce0d8001c7952a25b" +dependencies = [ + "compiler_builtins", + "gimli", + "rustc-std-workspace-core", ] [[package]] diff --git a/compiler/rustc_codegen_cranelift/rust-toolchain b/compiler/rustc_codegen_cranelift/rust-toolchain index b832b06e0..4ba08f1af 100644 --- a/compiler/rustc_codegen_cranelift/rust-toolchain +++ b/compiler/rustc_codegen_cranelift/rust-toolchain @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2023-11-10" +channel = "nightly-2023-12-19" components = ["rust-src", "rustc-dev", "llvm-tools"] diff --git a/compiler/rustc_codegen_cranelift/rustfmt.toml b/compiler/rustc_codegen_cranelift/rustfmt.toml index ebeca8662..0f884187a 100644 --- a/compiler/rustc_codegen_cranelift/rustfmt.toml +++ b/compiler/rustc_codegen_cranelift/rustfmt.toml @@ -1,4 +1,7 @@ -ignore = ["y.rs"] +ignore = [ + "y.rs", + "example/gen_block_iterate.rs", # uses edition 2024 +] # Matches rustfmt.toml of rustc version = "Two" diff --git a/compiler/rustc_codegen_cranelift/scripts/rustc-clif.rs b/compiler/rustc_codegen_cranelift/scripts/rustc-clif.rs index 33d51bddd..550f20515 100644 --- a/compiler/rustc_codegen_cranelift/scripts/rustc-clif.rs +++ b/compiler/rustc_codegen_cranelift/scripts/rustc-clif.rs @@ -27,7 +27,7 @@ fn main() { args.push(codegen_backend_arg); } if !passed_args.iter().any(|arg| { - arg == "--sysroot" || arg.to_str().map(|s| s.starts_with("--sysroot=")) == Some(true) + arg == "--sysroot" || arg.to_str().is_some_and(|s| s.starts_with("--sysroot=")) }) { args.push(OsString::from("--sysroot")); args.push(OsString::from(sysroot.to_str().unwrap())); diff --git a/compiler/rustc_codegen_cranelift/scripts/rustdoc-clif.rs b/compiler/rustc_codegen_cranelift/scripts/rustdoc-clif.rs index 10582cc7b..f7d1bdbc4 100644 --- a/compiler/rustc_codegen_cranelift/scripts/rustdoc-clif.rs +++ b/compiler/rustc_codegen_cranelift/scripts/rustdoc-clif.rs @@ -27,7 +27,7 @@ fn main() { args.push(codegen_backend_arg); } if !passed_args.iter().any(|arg| { - arg == "--sysroot" || arg.to_str().map(|s| s.starts_with("--sysroot=")) == Some(true) + arg == "--sysroot" || arg.to_str().is_some_and(|s| s.starts_with("--sysroot=")) }) { args.push(OsString::from("--sysroot")); args.push(OsString::from(sysroot.to_str().unwrap())); diff --git a/compiler/rustc_codegen_cranelift/scripts/rustup.sh b/compiler/rustc_codegen_cranelift/scripts/rustup.sh index e62788f2e..355282911 100755 --- a/compiler/rustc_codegen_cranelift/scripts/rustup.sh +++ b/compiler/rustc_codegen_cranelift/scripts/rustup.sh @@ -46,7 +46,7 @@ case $1 in git pull origin master branch=sync_cg_clif-$(date +%Y-%m-%d) git checkout -b "$branch" - "$cg_clif/git-fixed-subtree.sh" pull --prefix=compiler/rustc_codegen_cranelift/ https://github.com/bjorn3/rustc_codegen_cranelift.git master + "$cg_clif/git-fixed-subtree.sh" pull --prefix=compiler/rustc_codegen_cranelift/ https://github.com/rust-lang/rustc_codegen_cranelift.git master git push -u my "$branch" # immediately merge the merge commit into cg_clif to prevent merge conflicts when syncing diff --git a/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh b/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh index bbb8a010d..731828caa 100644 --- a/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh +++ b/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh @@ -1,15 +1,17 @@ #!/usr/bin/env bash set -e +# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we +# the LLVM backend isn't compiled in here. +export CG_CLIF_FORCE_GNU_AS=1 + # Compiletest expects all standard library paths to start with /rustc/FAKE_PREFIX. # CG_CLIF_STDLIB_REMAP_PATH_PREFIX will cause cg_clif's build system to pass # --remap-path-prefix to handle this. -# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we -# the LLVM backend isn't compiled in here. -CG_CLIF_FORCE_GNU_AS=1 CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build +CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build echo "[SETUP] Rust fork" -git clone https://github.com/rust-lang/rust.git || true +git clone https://github.com/rust-lang/rust.git --filter=tree:0 || true pushd rust git fetch git checkout -- . diff --git a/compiler/rustc_codegen_cranelift/scripts/test_bootstrap.sh b/compiler/rustc_codegen_cranelift/scripts/test_bootstrap.sh index a8f6d7a20..791d45799 100755 --- a/compiler/rustc_codegen_cranelift/scripts/test_bootstrap.sh +++ b/compiler/rustc_codegen_cranelift/scripts/test_bootstrap.sh @@ -11,7 +11,5 @@ rm -r compiler/rustc_codegen_cranelift/{Cargo.*,src} cp ../Cargo.* compiler/rustc_codegen_cranelift/ cp -r ../src compiler/rustc_codegen_cranelift/src -# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we -# the LLVM backend isn't compiled in here. -CG_CLIF_FORCE_GNU_AS=1 ./x.py build --stage 1 library/std +./x.py build --stage 1 library/std popd diff --git a/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh b/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh index cdc78adcf..7d7ffdadc 100755 --- a/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh +++ b/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh @@ -44,6 +44,7 @@ rm tests/ui/proc-macro/no-mangle-in-proc-macro-issue-111888.rs # vendor intrinsics rm tests/ui/sse2.rs # CodegenBackend::target_features not yet implemented rm tests/ui/simd/array-type.rs # "Index argument for `simd_insert` is not a constant" +rm tests/ui/simd/masked-load-store.rs # exotic linkages rm tests/ui/issues/issue-33992.rs # unsupported linkages @@ -146,11 +147,6 @@ rm tests/ui/process/nofile-limit.rs # TODO some AArch64 linking issue rm tests/ui/stdio-is-blocking.rs # really slow with unoptimized libstd -# rustc bugs -# ========== -# https://github.com/rust-lang/rust/pull/116447#issuecomment-1790451463 -rm tests/ui/coroutine/gen_block_*.rs - cp ../dist/bin/rustdoc-clif ../dist/bin/rustdoc # some tests expect bin/rustdoc to exist # prevent $(RUSTDOC) from picking up the sysroot built by x.py. It conflicts with the one used by diff --git a/compiler/rustc_codegen_cranelift/src/abi/comments.rs b/compiler/rustc_codegen_cranelift/src/abi/comments.rs index ade6968de..a318cae17 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/comments.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/comments.rs @@ -3,7 +3,6 @@ use std::borrow::Cow; -use rustc_middle::mir; use rustc_target::abi::call::PassMode; use crate::prelude::*; diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs index c4572e035..2c194f6d6 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs @@ -6,7 +6,7 @@ mod returning; use std::borrow::Cow; -use cranelift_codegen::ir::{AbiParam, SigRef}; +use cranelift_codegen::ir::SigRef; use cranelift_module::ModuleError; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::ty::layout::FnAbiOf; @@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>( args, ret_place, target, + source_info.span, ); return; } diff --git a/compiler/rustc_codegen_cranelift/src/analyze.rs b/compiler/rustc_codegen_cranelift/src/analyze.rs index 321612238..c5762638a 100644 --- a/compiler/rustc_codegen_cranelift/src/analyze.rs +++ b/compiler/rustc_codegen_cranelift/src/analyze.rs @@ -2,7 +2,6 @@ use rustc_index::IndexVec; use rustc_middle::mir::StatementKind::*; -use rustc_middle::ty::Ty; use crate::prelude::*; diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs index 91b1547cb..df40a5eb4 100644 --- a/compiler/rustc_codegen_cranelift/src/base.rs +++ b/compiler/rustc_codegen_cranelift/src/base.rs @@ -176,10 +176,10 @@ pub(crate) fn compile_fn( match module.define_function(codegened_func.func_id, context) { Ok(()) => {} Err(ModuleError::Compilation(CodegenError::ImplLimitExceeded)) => { - let handler = rustc_session::EarlyErrorHandler::new( + let early_dcx = rustc_session::EarlyDiagCtxt::new( rustc_session::config::ErrorOutputType::default(), ); - handler.early_error(format!( + early_dcx.early_error(format!( "backend implementation limit exceeded while compiling {name}", name = codegened_func.symbol_name )); @@ -353,7 +353,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { fx, rustc_hir::LangItem::PanicBoundsCheck, &[index, len, location], - source_info.span, + Some(source_info.span), ); } AssertKind::MisalignedPointerDereference { ref required, ref found } => { @@ -365,7 +365,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { fx, rustc_hir::LangItem::PanicMisalignedPointerDereference, &[required, found, location], - source_info.span, + Some(source_info.span), ); } _ => { @@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { ); } - crate::inline_asm::codegen_inline_asm( + crate::inline_asm::codegen_inline_asm_terminator( fx, source_info.span, template, @@ -945,19 +945,19 @@ pub(crate) fn codegen_panic<'tcx>( let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap()); let args = [msg_ptr, msg_len, location]; - codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, source_info.span); + codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, Some(source_info.span)); } pub(crate) fn codegen_panic_nounwind<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, msg_str: &str, - source_info: mir::SourceInfo, + span: Option<Span>, ) { let msg_ptr = fx.anonymous_str(msg_str); let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap()); let args = [msg_ptr, msg_len]; - codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, source_info.span); + codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, span); } pub(crate) fn codegen_unwind_terminate<'tcx>( @@ -967,16 +967,16 @@ pub(crate) fn codegen_unwind_terminate<'tcx>( ) { let args = []; - codegen_panic_inner(fx, reason.lang_item(), &args, source_info.span); + codegen_panic_inner(fx, reason.lang_item(), &args, Some(source_info.span)); } fn codegen_panic_inner<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, lang_item: rustc_hir::LangItem, args: &[Value], - span: Span, + span: Option<Span>, ) { - let def_id = fx.tcx.require_lang_item(lang_item, Some(span)); + let def_id = fx.tcx.require_lang_item(lang_item, span); let instance = Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx); let symbol_name = fx.tcx.symbol_name(instance).name; diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs index 63562d335..bd19a7ed0 100644 --- a/compiler/rustc_codegen_cranelift/src/common.rs +++ b/compiler/rustc_codegen_cranelift/src/common.rs @@ -98,11 +98,15 @@ fn clif_pair_type_from_ty<'tcx>( /// Is a pointer to this type a fat ptr? pub(crate) fn has_ptr_meta<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> bool { - let ptr_ty = Ty::new_ptr(tcx, TypeAndMut { ty, mutbl: rustc_hir::Mutability::Not }); - match &tcx.layout_of(ParamEnv::reveal_all().and(ptr_ty)).unwrap().abi { - Abi::Scalar(_) => false, - Abi::ScalarPair(_, _) => true, - abi => unreachable!("Abi of ptr to {:?} is {:?}???", ty, abi), + if ty.is_sized(tcx, ParamEnv::reveal_all()) { + return false; + } + + let tail = tcx.struct_tail_erasing_lifetimes(ty, ParamEnv::reveal_all()); + match tail.kind() { + ty::Foreign(..) => false, + ty::Str | ty::Slice(..) | ty::Dynamic(..) => true, + _ => bug!("unexpected unsized tail: {:?}", tail), } } diff --git a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs index 20f2ee4c7..967896913 100644 --- a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs +++ b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs @@ -46,7 +46,7 @@ impl ConcurrencyLimiter { } } - pub(super) fn acquire(&mut self, handler: &rustc_errors::Handler) -> ConcurrencyLimiterToken { + pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken { let mut state = self.state.lock().unwrap(); loop { state.assert_invariants(); @@ -64,7 +64,7 @@ impl ConcurrencyLimiter { // Make sure to drop the mutex guard first to prevent poisoning the mutex. drop(state); if let Some(err) = err { - handler.fatal(err).raise(); + dcx.fatal(err); } else { // The error was already emitted, but compilation continued. Raise a silent // fatal error. diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs index b0853d30e..9ffa006e5 100644 --- a/compiler/rustc_codegen_cranelift/src/constant.rs +++ b/compiler/rustc_codegen_cranelift/src/constant.rs @@ -1,10 +1,12 @@ //! Handling of `static`s, `const`s and promoted allocations +use std::cmp::Ordering; + use cranelift_module::*; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::FxHashSet; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar}; -use rustc_middle::mir::ConstValue; +use rustc_middle::ty::ScalarInt; use crate::prelude::*; @@ -123,7 +125,8 @@ pub(crate) fn codegen_const_value<'tcx>( } } Scalar::Ptr(ptr, _size) => { - let (alloc_id, offset) = ptr.into_parts(); // we know the `offset` is relative + let (prov, offset) = ptr.into_parts(); // we know the `offset` is relative + let alloc_id = prov.alloc_id(); let base_addr = match fx.tcx.global_alloc(alloc_id) { GlobalAlloc::Memory(alloc) => { let data_id = data_id_for_alloc_id( @@ -371,7 +374,8 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec(); data.define(bytes.into_boxed_slice()); - for &(offset, alloc_id) in alloc.provenance().ptrs().iter() { + for &(offset, prov) in alloc.provenance().ptrs().iter() { + let alloc_id = prov.alloc_id(); let addend = { let endianness = tcx.data_layout.endian; let offset = offset.bytes() as usize; @@ -430,9 +434,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant pub(crate) fn mir_operand_get_const_val<'tcx>( fx: &FunctionCx<'_, '_, 'tcx>, operand: &Operand<'tcx>, -) -> Option<ConstValue<'tcx>> { +) -> Option<ScalarInt> { match operand { - Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0), + Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(), // FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored // inside a temporary before being passed to the intrinsic requiring the const argument. // This code tries to find a single constant defining definition of the referenced local. @@ -440,7 +444,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( if !place.projection.is_empty() { return None; } - let mut computed_const_val = None; + let mut computed_scalar_int = None; for bb_data in fx.mir.basic_blocks.iter() { for stmt in &bb_data.statements { match &stmt.kind { @@ -456,22 +460,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( operand, ty, ) => { - if computed_const_val.is_some() { + if computed_scalar_int.is_some() { return None; // local assigned twice } if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) { return None; } - let const_val = mir_operand_get_const_val(fx, operand)?; - if fx.layout_of(*ty).size - != const_val.try_to_scalar_int()?.size() + let scalar_int = mir_operand_get_const_val(fx, operand)?; + let scalar_int = match fx + .layout_of(*ty) + .size + .cmp(&scalar_int.size()) { - return None; - } - computed_const_val = Some(const_val); + Ordering::Equal => scalar_int, + Ordering::Less => match ty.kind() { + ty::Uint(_) => ScalarInt::try_from_uint( + scalar_int.try_to_uint(scalar_int.size()).unwrap(), + fx.layout_of(*ty).size, + ) + .unwrap(), + ty::Int(_) => ScalarInt::try_from_int( + scalar_int.try_to_int(scalar_int.size()).unwrap(), + fx.layout_of(*ty).size, + ) + .unwrap(), + _ => unreachable!(), + }, + Ordering::Greater => return None, + }; + computed_scalar_int = Some(scalar_int); } Rvalue::Use(operand) => { - computed_const_val = mir_operand_get_const_val(fx, operand) + computed_scalar_int = mir_operand_get_const_val(fx, operand) } _ => return None, } @@ -522,7 +542,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>( TerminatorKind::Call { .. } => {} } } - computed_const_val + computed_scalar_int } } } diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs index 11229dd42..b3ab533df 100644 --- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs +++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs @@ -422,7 +422,7 @@ pub(crate) fn run_aot( backend_config.clone(), global_asm_config.clone(), cgu.name(), - concurrency_limiter.acquire(tcx.sess.diagnostic()), + concurrency_limiter.acquire(tcx.sess.dcx()), ), module_codegen, Some(rustc_middle::dep_graph::hash_result), diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs index ce0eecca8..73f4bc7c1 100644 --- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs +++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs @@ -3,14 +3,13 @@ use std::fmt::Write; use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; -use rustc_middle::mir::InlineAsmOperand; use rustc_span::sym; use rustc_target::asm::*; use target_lexicon::BinaryFormat; use crate::prelude::*; -enum CInlineAsmOperand<'tcx> { +pub(crate) enum CInlineAsmOperand<'tcx> { In { reg: InlineAsmRegOrRegClass, value: Value, @@ -34,7 +33,7 @@ enum CInlineAsmOperand<'tcx> { }, } -pub(crate) fn codegen_inline_asm<'tcx>( +pub(crate) fn codegen_inline_asm_terminator<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, span: Span, template: &[InlineAsmTemplatePiece], @@ -42,8 +41,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( options: InlineAsmOptions, destination: Option<mir::BasicBlock>, ) { - // FIXME add .eh_frame unwind info directives - // Used by panic_abort on Windows, but uses a syntax which only happens to work with // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for // the LLVM backend. @@ -135,15 +132,33 @@ pub(crate) fn codegen_inline_asm<'tcx>( }) .collect::<Vec<_>>(); - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); + codegen_inline_asm_inner(fx, template, &operands, options); + + match destination { + Some(destination) => { + let destination_block = fx.get_block(destination); + fx.bcx.ins().jump(destination_block, &[]); + } + None => { + fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); + } + } +} + +pub(crate) fn codegen_inline_asm_inner<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + template: &[InlineAsmTemplatePiece], + operands: &[CInlineAsmOperand<'tcx>], + options: InlineAsmOptions, +) { + // FIXME add .eh_frame unwind info directives let mut asm_gen = InlineAssemblyGenerator { tcx: fx.tcx, arch: fx.tcx.sess.asm_arch.unwrap(), enclosing_def_id: fx.instance.def_id(), template, - operands: &operands, + operands, options, registers: Vec::new(), stack_slots_clobber: Vec::new(), @@ -165,6 +180,8 @@ pub(crate) fn codegen_inline_asm<'tcx>( let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); fx.cx.global_asm.push_str(&generated_asm); + let mut inputs = Vec::new(); + let mut outputs = Vec::new(); for (i, operand) in operands.iter().enumerate() { match operand { CInlineAsmOperand::In { reg: _, value } => { @@ -186,16 +203,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( } call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); - - match destination { - Some(destination) => { - let destination_block = fx.get_block(destination); - fx.bcx.ins().jump(destination_block, &[]); - } - None => { - fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); - } - } } struct InlineAssemblyGenerator<'a, 'tcx> { @@ -637,8 +644,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { ) { match arch { InlineAsmArch::X86_64 => { - write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); - reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + match reg { + InlineAsmReg::X86(reg) + if reg as u32 >= X86InlineAsmReg::xmm0 as u32 + && reg as u32 <= X86InlineAsmReg::xmm15 as u32 => + { + // rustc emits x0 rather than xmm0 + write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap(); + write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32) + .unwrap(); + } + _ => { + write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap(); + reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + } + } generated_asm.push('\n'); } InlineAsmArch::AArch64 => { @@ -663,8 +683,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> { ) { match arch { InlineAsmArch::X86_64 => { - generated_asm.push_str(" mov "); - reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap(); + match reg { + InlineAsmReg::X86(reg) + if reg as u32 >= X86InlineAsmReg::xmm0 as u32 + && reg as u32 <= X86InlineAsmReg::xmm15 as u32 => + { + // rustc emits x0 rather than xmm0 + write!( + generated_asm, + " movups xmm{}", + reg as u32 - X86InlineAsmReg::xmm0 as u32 + ) + .unwrap(); + } + _ => { + generated_asm.push_str(" mov "); + reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap() + } + } writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap(); } InlineAsmArch::AArch64 => { @@ -720,7 +756,12 @@ fn call_inline_asm<'tcx>( fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); for (offset, place) in outputs { - let ty = fx.clif_type(place.layout().ty).unwrap(); + let ty = if place.layout().ty.is_simd() { + let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx); + fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap() + } else { + fx.clif_type(place.layout().ty).unwrap() + }; let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( fx, ty, @@ -729,83 +770,3 @@ fn call_inline_asm<'tcx>( place.write_cvalue(fx, CValue::by_val(value, place.layout())); } } - -pub(crate) fn codegen_xgetbv<'tcx>( - fx: &mut FunctionCx<'_, '_, 'tcx>, - xcr_no: Value, - ret: CPlace<'tcx>, -) { - // FIXME add .eh_frame unwind info directives - - let operands = vec![ - CInlineAsmOperand::In { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - value: xcr_no, - }, - CInlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: true, - place: Some(ret), - }, - CInlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: true, - place: None, - }, - ]; - let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM; - - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); - - let mut asm_gen = InlineAssemblyGenerator { - tcx: fx.tcx, - arch: fx.tcx.sess.asm_arch.unwrap(), - enclosing_def_id: fx.instance.def_id(), - template: &[InlineAsmTemplatePiece::String( - " - xgetbv - // out = rdx << 32 | rax - shl rdx, 32 - or rax, rdx - " - .to_string(), - )], - operands: &operands, - options, - registers: Vec::new(), - stack_slots_clobber: Vec::new(), - stack_slots_input: Vec::new(), - stack_slots_output: Vec::new(), - stack_slot_size: Size::from_bytes(0), - }; - asm_gen.allocate_registers(); - asm_gen.allocate_stack_slots(); - - let inline_asm_index = fx.cx.inline_asm_index.get(); - fx.cx.inline_asm_index.set(inline_asm_index + 1); - let asm_name = format!( - "__inline_asm_{}_n{}", - fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"), - inline_asm_index - ); - - let generated_asm = asm_gen.generate_asm_wrapper(&asm_name); - fx.cx.global_asm.push_str(&generated_asm); - - for (i, operand) in operands.iter().enumerate() { - match operand { - CInlineAsmOperand::In { reg: _, value } => { - inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value)); - } - CInlineAsmOperand::Out { reg: _, late: _, place } => { - if let Some(place) = place { - outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place)); - } - } - _ => unreachable!(), - } - } - - call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs); -} diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs index e9b7daf14..dbd5db875 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs @@ -1,7 +1,5 @@ //! Emulate LLVM intrinsics -use rustc_middle::ty::GenericArgsRef; - use crate::intrinsics::*; use crate::prelude::*; @@ -12,6 +10,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, target: Option<BasicBlock>, + span: Span, ) { if intrinsic.starts_with("llvm.aarch64") { return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call( @@ -31,6 +30,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( args, ret, target, + span, ); } diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs index ee098be1f..e1e514dca 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs @@ -1,7 +1,5 @@ //! Emulate AArch64 LLVM intrinsics -use rustc_middle::ty::GenericArgsRef; - use crate::intrinsics::*; use crate::prelude::*; diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index 4c5360486..99bb5c4ea 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -1,7 +1,9 @@ //! Emulate x86 LLVM intrinsics -use rustc_middle::ty::GenericArgsRef; +use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; +use rustc_target::asm::*; +use crate::inline_asm::{codegen_inline_asm_inner, CInlineAsmOperand}; use crate::intrinsics::*; use crate::prelude::*; @@ -12,19 +14,53 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( args: &[mir::Operand<'tcx>], ret: CPlace<'tcx>, target: Option<BasicBlock>, + span: Span, ) { match intrinsic { "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => { // Spin loop hint } + "llvm.x86.avx.vzeroupper" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper&ig_expand=7218 + // Do nothing. It is a perf hint anyway. + } + // Used by is_x86_feature_detected!(); "llvm.x86.xgetbv" => { intrinsic_args!(fx, args => (xcr_no); intrinsic); let xcr_no = xcr_no.load_scalar(fx); - crate::inline_asm::codegen_xgetbv(fx, xcr_no, ret); + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + " + xgetbv + // out = rdx << 32 | rax + shl rdx, 32 + or rax, rdx + " + .to_string(), + )], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + value: xcr_no, + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + late: true, + place: Some(ret), + }, + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + late: true, + place: None, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); } "llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => { @@ -37,6 +73,103 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); } + "llvm.x86.avx2.gather.d.d" + | "llvm.x86.avx2.gather.d.q" + | "llvm.x86.avx2.gather.d.ps" + | "llvm.x86.avx2.gather.d.pd" + | "llvm.x86.avx2.gather.d.d.256" + | "llvm.x86.avx2.gather.d.q.256" + | "llvm.x86.avx2.gather.d.ps.256" + | "llvm.x86.avx2.gather.d.pd.256" + | "llvm.x86.avx2.gather.q.d" + | "llvm.x86.avx2.gather.q.q" + | "llvm.x86.avx2.gather.q.ps" + | "llvm.x86.avx2.gather.q.pd" + | "llvm.x86.avx2.gather.q.d.256" + | "llvm.x86.avx2.gather.q.q.256" + | "llvm.x86.avx2.gather.q.ps.256" + | "llvm.x86.avx2.gather.q.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd&ig_expand=3819 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd&ig_expand=3821 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd&ig_expand=3822 + // ... + + intrinsic_args!(fx, args => (src, ptr, index, mask, scale); intrinsic); + + let (src_lane_count, src_lane_ty) = src.layout().ty.simd_size_and_type(fx.tcx); + let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx); + let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(src_lane_ty, ret_lane_ty); + assert!(index_lane_ty.is_integral()); + assert_eq!(src_lane_count, mask_lane_count); + assert_eq!(src_lane_count, ret_lane_count); + + let lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap(); + let index_lane_clif_ty = fx.clif_type(index_lane_ty).unwrap(); + let mask_lane_clif_ty = fx.clif_type(mask_lane_ty).unwrap(); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + let ptr = ptr.load_scalar(fx); + let scale = scale.load_scalar(fx); + let scale = fx.bcx.ins().uextend(types::I64, scale); + for lane_idx in 0..std::cmp::min(src_lane_count, index_lane_count) { + let src_lane = src.value_lane(fx, lane_idx).load_scalar(fx); + let index_lane = index.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = + fx.bcx.ins().bitcast(mask_lane_clif_ty.as_int(), MemFlags::new(), mask_lane); + + let if_enabled = fx.bcx.create_block(); + let if_disabled = fx.bcx.create_block(); + let next = fx.bcx.create_block(); + let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); + + let mask_lane = match mask_lane_clif_ty { + types::I32 | types::F32 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64) + } + types::I64 | types::F64 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64) + } + _ => unreachable!(), + }; + fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); + fx.bcx.seal_block(if_enabled); + fx.bcx.seal_block(if_disabled); + + fx.bcx.switch_to_block(if_enabled); + let index_lane = if index_lane_clif_ty != types::I64 { + fx.bcx.ins().sextend(types::I64, index_lane) + } else { + index_lane + }; + let offset = fx.bcx.ins().imul(index_lane, scale); + let lane_ptr = fx.bcx.ins().iadd(ptr, offset); + let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), lane_ptr, 0); + fx.bcx.ins().jump(next, &[res]); + + fx.bcx.switch_to_block(if_disabled); + fx.bcx.ins().jump(next, &[src_lane]); + + fx.bcx.seal_block(next); + fx.bcx.switch_to_block(next); + + fx.bcx.ins().nop(); + + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout)); + } + + for lane_idx in std::cmp::min(src_lane_count, index_lane_count)..ret_lane_count { + let zero_lane = fx.bcx.ins().iconst(mask_lane_clif_ty.as_int(), 0); + let zero_lane = fx.bcx.ins().bitcast(mask_lane_clif_ty, MemFlags::new(), zero_lane); + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(zero_lane, ret_lane_layout)); + } + } + "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), @@ -241,16 +374,31 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ); } "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => { - let a = match args { - [a] => a, - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); + intrinsic_args!(fx, args => (a); intrinsic); simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { fx.bcx.ins().iabs(lane) }); } + "llvm.x86.sse2.cvttps2dq" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32&ig_expand=2429 + intrinsic_args!(fx, args => (a); intrinsic); + let a = a.load_scalar(fx); + + // Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned + // into 0x80000000 for which Cranelift doesn't have a native instruction. + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } "llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => { intrinsic_args!(fx, args => (c_in, a, b); intrinsic); let c_in = c_in.load_scalar(fx); @@ -332,9 +480,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for out_lane_idx in 0..lane_count / 8 { let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); - for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 { let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let a_lane = fx.bcx.ins().uextend(types::I16, a_lane); let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = fx.bcx.ins().uextend(types::I16, b_lane); let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); let abs_lane_diff = fx.bcx.ins().iabs(lane_diff); @@ -405,12 +555,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let ret_lane_layout = fx.layout_of(fx.tcx.types.i32); for out_lane_idx in 0..lane_count / 2 { let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); - let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0); + let a_lane0 = fx.bcx.ins().sextend(types::I32, a_lane0); let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0); let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); - let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1); + let a_lane1 = fx.bcx.ins().sextend(types::I32, a_lane1); let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1); @@ -565,14 +715,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( assert_eq!(ret_lane_ty, fx.tcx.types.i16); assert_eq!(lane_count * 2, ret_lane_count); - let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); - let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64); + let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64); let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); for idx in 0..lane_count { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -582,7 +732,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -609,8 +759,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = a.value_lane(fx, idx).load_scalar(fx); - let sat = fx.bcx.ins().umax(lane, min_u16); - let sat = fx.bcx.ins().umin(sat, max_u16); + let sat = fx.bcx.ins().smax(lane, min_u16); + let sat = fx.bcx.ins().smin(sat, max_u16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -619,8 +769,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count { let lane = b.value_lane(fx, idx).load_scalar(fx); - let sat = fx.bcx.ins().umax(lane, min_u16); - let sat = fx.bcx.ins().umin(sat, max_u16); + let sat = fx.bcx.ins().smax(lane, min_u16); + let sat = fx.bcx.ins().smin(sat, max_u16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -641,14 +791,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( assert_eq!(ret_lane_ty, fx.tcx.types.i16); assert_eq!(lane_count * 2, ret_lane_count); - let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16)); - let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16)); + let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64); + let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64); let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); for idx in 0..lane_count / 2 { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -658,7 +808,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -668,7 +818,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = a.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -678,7 +828,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( for idx in 0..lane_count / 2 { let lane = b.value_lane(fx, idx).load_scalar(fx); let sat = fx.bcx.ins().smax(lane, min_i16); - let sat = fx.bcx.ins().umin(sat, max_i16); + let sat = fx.bcx.ins().smin(sat, max_i16); let res = fx.bcx.ins().ireduce(types::I16, sat); let res_lane = CValue::by_val(res, ret_lane_layout); @@ -686,66 +836,489 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } - "llvm.x86.pclmulqdq" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 - intrinsic_args!(fx, args => (a, b, imm8); intrinsic); + "llvm.x86.fma.vfmaddsub.ps" + | "llvm.x86.fma.vfmaddsub.pd" + | "llvm.x86.fma.vfmaddsub.ps.256" + | "llvm.x86.fma.vfmaddsub.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185 + intrinsic_args!(fx, args => (a, b, c); intrinsic); assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); let layout = a.layout(); let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert_eq!(lane_ty, fx.tcx.types.i64); - assert_eq!(ret_lane_ty, fx.tcx.types.i64); - assert_eq!(lane_count, 2); - assert_eq!(ret_lane_count, 2); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); - let imm8 = imm8.load_scalar(fx); + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let res = if idx & 1 == 0 { + fx.bcx.ins().fsub(mul, c_lane) + } else { + fx.bcx.ins().fadd(mul, c_lane) + }; - let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001); - let a_lane0 = a.value_lane(fx, 0).load_scalar(fx); - let a_lane1 = a.value_lane(fx, 1).load_scalar(fx); - let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0); + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); + } + } - let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000); - let b_lane0 = b.value_lane(fx, 0).load_scalar(fx); - let b_lane1 = b.value_lane(fx, 1).load_scalar(fx); - let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0); + "llvm.x86.fma.vfmsubadd.ps" + | "llvm.x86.fma.vfmsubadd.pd" + | "llvm.x86.fma.vfmsubadd.ps.256" + | "llvm.x86.fma.vfmsubadd.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305 + intrinsic_args!(fx, args => (a, b, c); intrinsic); - fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value { - let tmp = fx.bcx.ins().ushr_imm(val, bit); - fx.bcx.ins().band_imm(tmp, 1) - } + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); - let mut res1 = fx.bcx.ins().iconst(types::I64, 0); - for i in 0..=63 { - let x = extract_bit(fx, temp1, 0); - let y = extract_bit(fx, temp2, i); - let mut temp = fx.bcx.ins().band(x, y); - for j in 1..=i { - let x = extract_bit(fx, temp1, j); - let y = extract_bit(fx, temp2, i - j); - let z = fx.bcx.ins().band(x, y); - temp = fx.bcx.ins().bxor(temp, z); - } - let temp = fx.bcx.ins().ishl_imm(temp, i); - res1 = fx.bcx.ins().bor(res1, temp); + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let res = if idx & 1 == 0 { + fx.bcx.ins().fadd(mul, c_lane) + } else { + fx.bcx.ins().fsub(mul, c_lane) + }; + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); } - ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted()); - - let mut res2 = fx.bcx.ins().iconst(types::I64, 0); - for i in 64..=127 { - let mut temp = fx.bcx.ins().iconst(types::I64, 0); - for j in i - 63..=63 { - let x = extract_bit(fx, temp1, j); - let y = extract_bit(fx, temp2, i - j); - let z = fx.bcx.ins().band(x, y); - temp = fx.bcx.ins().bxor(temp, z); - } - let temp = fx.bcx.ins().ishl_imm(temp, i); - res2 = fx.bcx.ins().bor(res2, temp); + } + + "llvm.x86.fma.vfnmadd.ps" + | "llvm.x86.fma.vfnmadd.pd" + | "llvm.x86.fma.vfnmadd.ps.256" + | "llvm.x86.fma.vfnmadd.pd.256" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395 + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371 + intrinsic_args!(fx, args => (a, b, c); intrinsic); + + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), c.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + assert!(ret_lane_ty.is_floating_point()); + assert_eq!(lane_count, ret_lane_count); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + + for idx in 0..lane_count { + let a_lane = a.value_lane(fx, idx).load_scalar(fx); + let b_lane = b.value_lane(fx, idx).load_scalar(fx); + let c_lane = c.value_lane(fx, idx).load_scalar(fx); + + let mul = fx.bcx.ins().fmul(a_lane, b_lane); + let neg_mul = fx.bcx.ins().fneg(mul); + let res = fx.bcx.ins().fadd(neg_mul, c_lane); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, idx).write_cvalue(fx, res_lane); } - ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted()); + } + + "llvm.x86.sse42.pcmpestri128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939 + intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let la = la.load_scalar(fx); + let b = b.load_scalar(fx); + let lb = lb.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4]) + { + imm8 + } else { + fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestri` is not a constant"); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pcmpestri xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + value: a, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + value: la, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + value: lb, + }, + // Implicit result of the pcmpestri intrinsic + CInlineAsmOperand::Out { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), + late: true, + place: Some(ret), + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sse42.pcmpestrm128" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm&ig_expand=940 + intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let la = la.load_scalar(fx); + let b = b.load_scalar(fx); + let lb = lb.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4]) + { + imm8 + } else { + fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestrm` is not a constant"); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pcmpestrm xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), + value: la, + }, + // Implicit argument to the pcmpestri intrinsic + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), + value: lb, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.pclmulqdq" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772 + intrinsic_args!(fx, args => (a, b, _imm8); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[2]) + { + imm8 + } else { + fx.tcx.sess.span_fatal( + span, + "Index argument for `_mm_clmulepi64_si128` is not a constant", + ); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("pclmulqdq xmm0, xmm1, {imm8}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aeskeygenassist" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261 + intrinsic_args!(fx, args => (a, _imm8); intrinsic); + + let a = a.load_scalar(fx); + + let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1]) + { + imm8 + } else { + fx.tcx.sess.span_fatal( + span, + "Index argument for `_mm_aeskeygenassist_si128` is not a constant", + ); + }; + + let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesimc" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260 + intrinsic_args!(fx, args => (a); intrinsic); + + let a = a.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())], + &[CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesenc" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesenclast" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesdec" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.aesni.aesdeclast" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247 + intrinsic_args!(fx, args => (a, round_key); intrinsic); + + let a = a.load_scalar(fx); + let round_key = round_key.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + value: round_key, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256rnds2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977 + intrinsic_args!(fx, args => (a, b, k); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + let k = k.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256rnds2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + // Implicit argument to the sha256rnds2 instruction + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)), + value: k, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256msg1" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32&ig_expand=5975 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256msg1 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha256msg2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32&ig_expand=5976 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha256msg2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); } "llvm.x86.avx.ptestz.256" => { diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index bfeeb117f..68126f124 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -487,13 +487,12 @@ fn codegen_regular_intrinsic_call<'tcx>( let layout = fx.layout_of(generic_args.type_at(0)); // Note: Can't use is_unsized here as truly unsized types need to take the fixed size // branch - let size = if let Abi::ScalarPair(_, _) = ptr.layout().abi { - let (_ptr, info) = ptr.load_scalar_pair(fx); - let (size, _align) = crate::unsize::size_and_align_of_dst(fx, layout, info); - size + let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi { + Some(ptr.load_scalar_pair(fx).1) } else { - fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64) + None }; + let (size, _align) = crate::unsize::size_and_align_of(fx, layout, meta); ret.write_cvalue(fx, CValue::by_val(size, usize_layout)); } sym::min_align_of_val => { @@ -502,13 +501,12 @@ fn codegen_regular_intrinsic_call<'tcx>( let layout = fx.layout_of(generic_args.type_at(0)); // Note: Can't use is_unsized here as truly unsized types need to take the fixed size // branch - let align = if let Abi::ScalarPair(_, _) = ptr.layout().abi { - let (_ptr, info) = ptr.load_scalar_pair(fx); - let (_size, align) = crate::unsize::size_and_align_of_dst(fx, layout, info); - align + let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi { + Some(ptr.load_scalar_pair(fx).1) } else { - fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64) + None }; + let (_size, align) = crate::unsize::size_and_align_of(fx, layout, meta); ret.write_cvalue(fx, CValue::by_val(align, usize_layout)); } @@ -688,7 +686,7 @@ fn codegen_regular_intrinsic_call<'tcx>( } }) }); - crate::base::codegen_panic_nounwind(fx, &msg_str, source_info); + crate::base::codegen_panic_nounwind(fx, &msg_str, Some(source_info.span)); return; } } diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index ea137c4ca..fe4f073f7 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -1,7 +1,6 @@ //! Codegen `extern "platform-intrinsic"` intrinsics. -use rustc_middle::ty::GenericArgsRef; -use rustc_span::Symbol; +use cranelift_codegen::ir::immediates::Offset32; use rustc_target::abi::Endian; use super::*; @@ -282,11 +281,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant"); }; - let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + let idx: u32 = idx_const + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count), @@ -331,10 +330,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count), @@ -1008,8 +1007,57 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( } } + sym::simd_masked_load => { + intrinsic_args!(fx, args => (mask, ptr, val); intrinsic); + + let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); + let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(val_lane_count, mask_lane_count); + assert_eq!(val_lane_count, ret_lane_count); + + let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap(); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + let ptr_val = ptr.load_scalar(fx); + + for lane_idx in 0..ret_lane_count { + let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx); + + let if_enabled = fx.bcx.create_block(); + let if_disabled = fx.bcx.create_block(); + let next = fx.bcx.create_block(); + let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); + + fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); + fx.bcx.seal_block(if_enabled); + fx.bcx.seal_block(if_disabled); + + fx.bcx.switch_to_block(if_enabled); + let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32; + let res = fx.bcx.ins().load( + lane_clif_ty, + MemFlags::trusted(), + ptr_val, + Offset32::new(offset), + ); + fx.bcx.ins().jump(next, &[res]); + + fx.bcx.switch_to_block(if_disabled); + fx.bcx.ins().jump(next, &[val_lane]); + + fx.bcx.seal_block(next); + fx.bcx.switch_to_block(next); + + fx.bcx.ins().nop(); + + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout)); + } + } + sym::simd_scatter => { - intrinsic_args!(fx, args => (val, ptr, mask); intrinsic); + intrinsic_args!(fx, args => (mask, ptr, val); intrinsic); let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx); diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs index da84e54a9..196418023 100644 --- a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs +++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs @@ -58,11 +58,10 @@ use std::fmt; use std::io::Write; -use cranelift_codegen::{ - entity::SecondaryMap, - ir::entities::AnyEntity, - write::{FuncWriter, PlainWriter}, -}; +use cranelift_codegen::entity::SecondaryMap; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::Fact; +use cranelift_codegen::write::{FuncWriter, PlainWriter}; use rustc_middle::ty::layout::FnAbiOf; use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_session::config::{OutputFilenames, OutputType}; @@ -155,8 +154,13 @@ impl FuncWriter for &'_ CommentWriter { _func: &Function, entity: AnyEntity, value: &dyn fmt::Display, + maybe_fact: Option<&Fact>, ) -> fmt::Result { - write!(w, " {} = {}", entity, value)?; + if let Some(fact) = maybe_fact { + write!(w, " {} ! {} = {}", entity, fact, value)?; + } else { + write!(w, " {} = {}", entity, value)?; + } if let Some(comment) = self.entity_comments.get(&entity) { writeln!(w, " ; {}", comment.replace('\n', "\n; ")) @@ -227,9 +231,8 @@ pub(crate) fn write_ir_file( let res = std::fs::File::create(clif_file_name).and_then(|mut file| write(&mut file)); if let Err(err) = res { // Using early_warn as no Session is available here - let handler = rustc_session::EarlyErrorHandler::new( - rustc_session::config::ErrorOutputType::default(), - ); + let handler = + rustc_session::EarlyDiagCtxt::new(rustc_session::config::ErrorOutputType::default()); handler.early_warn(format!("error writing ir file: {}", err)); } } diff --git a/compiler/rustc_codegen_cranelift/src/unsize.rs b/compiler/rustc_codegen_cranelift/src/unsize.rs index c6133f2b3..f777e1137 100644 --- a/compiler/rustc_codegen_cranelift/src/unsize.rs +++ b/compiler/rustc_codegen_cranelift/src/unsize.rs @@ -2,6 +2,9 @@ //! //! [`PointerCoercion::Unsize`]: `rustc_middle::ty::adjustment::PointerCoercion::Unsize` +use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths}; + +use crate::base::codegen_panic_nounwind; use crate::prelude::*; // Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/base.rs#L159-L307 @@ -187,63 +190,113 @@ pub(crate) fn coerce_dyn_star<'tcx>( // Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/glue.rs -pub(crate) fn size_and_align_of_dst<'tcx>( +pub(crate) fn size_and_align_of<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, layout: TyAndLayout<'tcx>, - info: Value, + info: Option<Value>, ) -> (Value, Value) { - assert!(layout.is_unsized() || layout.abi == Abi::Uninhabited); - match layout.ty.kind() { + if layout.is_sized() { + return ( + fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64), + fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64), + ); + } + + let ty = layout.ty; + match ty.kind() { ty::Dynamic(..) => { // load size/align from vtable - (crate::vtable::size_of_obj(fx, info), crate::vtable::min_align_of_obj(fx, info)) + ( + crate::vtable::size_of_obj(fx, info.unwrap()), + crate::vtable::min_align_of_obj(fx, info.unwrap()), + ) } ty::Slice(_) | ty::Str => { let unit = layout.field(fx, 0); // The info in this case is the length of the str, so the size is that // times the unit size. ( - fx.bcx.ins().imul_imm(info, unit.size.bytes() as i64), + fx.bcx.ins().imul_imm(info.unwrap(), unit.size.bytes() as i64), fx.bcx.ins().iconst(fx.pointer_type, unit.align.abi.bytes() as i64), ) } - _ => { + ty::Foreign(_) => { + let trap_block = fx.bcx.create_block(); + let true_ = fx.bcx.ins().iconst(types::I8, 1); + let next_block = fx.bcx.create_block(); + fx.bcx.ins().brif(true_, trap_block, &[], next_block, &[]); + fx.bcx.seal_block(trap_block); + fx.bcx.seal_block(next_block); + fx.bcx.switch_to_block(trap_block); + + // `extern` type. We cannot compute the size, so panic. + let msg_str = with_no_visible_paths!({ + with_no_trimmed_paths!({ + format!("attempted to compute the size or alignment of extern type `{ty}`") + }) + }); + + codegen_panic_nounwind(fx, &msg_str, None); + + fx.bcx.switch_to_block(next_block); + + // This function does not return so we can now return whatever we want. + let size = fx.bcx.ins().iconst(fx.pointer_type, 42); + let align = fx.bcx.ins().iconst(fx.pointer_type, 42); + (size, align) + } + ty::Adt(..) | ty::Tuple(..) => { // First get the size of all statically known fields. // Don't use size_of because it also rounds up to alignment, which we // want to avoid, as the unsized field's alignment could be smaller. assert!(!layout.ty.is_simd()); let i = layout.fields.count() - 1; - let sized_size = layout.fields.offset(i).bytes(); + let unsized_offset_unadjusted = layout.fields.offset(i).bytes(); + let unsized_offset_unadjusted = + fx.bcx.ins().iconst(fx.pointer_type, unsized_offset_unadjusted as i64); let sized_align = layout.align.abi.bytes(); let sized_align = fx.bcx.ins().iconst(fx.pointer_type, sized_align as i64); // Recurse to get the size of the dynamically sized field (must be // the last field). let field_layout = layout.field(fx, i); - let (unsized_size, mut unsized_align) = size_and_align_of_dst(fx, field_layout, info); - - // FIXME (#26403, #27023): We should be adding padding - // to `sized_size` (to accommodate the `unsized_align` - // required of the unsized field that follows) before - // summing it with `sized_size`. (Note that since #26403 - // is unfixed, we do not yet add the necessary padding - // here. But this is where the add would go.) - - // Return the sum of sizes and max of aligns. - let size = fx.bcx.ins().iadd_imm(unsized_size, sized_size as i64); - - // Packed types ignore the alignment of their fields. - if let ty::Adt(def, _) = layout.ty.kind() { - if def.repr().packed() { - unsized_align = sized_align; + let (unsized_size, mut unsized_align) = size_and_align_of(fx, field_layout, info); + + // # First compute the dynamic alignment + + // For packed types, we need to cap the alignment. + if let ty::Adt(def, _) = ty.kind() { + if let Some(packed) = def.repr().pack { + if packed.bytes() == 1 { + // We know this will be capped to 1. + unsized_align = fx.bcx.ins().iconst(fx.pointer_type, 1); + } else { + // We have to dynamically compute `min(unsized_align, packed)`. + let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64); + let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed); + unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed); + } } } // Choose max of two known alignments (combined value must // be aligned according to more restrictive of the two). let cmp = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, sized_align, unsized_align); - let align = fx.bcx.ins().select(cmp, sized_align, unsized_align); + let full_align = fx.bcx.ins().select(cmp, sized_align, unsized_align); + + // # Then compute the dynamic size + + // The full formula for the size would be: + // let unsized_offset_adjusted = unsized_offset_unadjusted.align_to(unsized_align); + // let full_size = (unsized_offset_adjusted + unsized_size).align_to(full_align); + // However, `unsized_size` is a multiple of `unsized_align`. + // Therefore, we can equivalently do the `align_to(unsized_align)` *after* adding `unsized_size`: + // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(unsized_align).align_to(full_align); + // Furthermore, `align >= unsized_align`, and therefore we only need to do: + // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(full_align); + + let full_size = fx.bcx.ins().iadd(unsized_offset_unadjusted, unsized_size); // Issue #27023: must add any necessary padding to `size` // (to make it a multiple of `align`) before returning it. @@ -255,12 +308,13 @@ pub(crate) fn size_and_align_of_dst<'tcx>( // emulated via the semi-standard fast bit trick: // // `(size + (align-1)) & -align` - let addend = fx.bcx.ins().iadd_imm(align, -1); - let add = fx.bcx.ins().iadd(size, addend); - let neg = fx.bcx.ins().ineg(align); - let size = fx.bcx.ins().band(add, neg); + let addend = fx.bcx.ins().iadd_imm(full_align, -1); + let add = fx.bcx.ins().iadd(full_size, addend); + let neg = fx.bcx.ins().ineg(full_align); + let full_size = fx.bcx.ins().band(add, neg); - (size, align) + (full_size, full_align) } + _ => bug!("size_and_align_of_dst: {ty} not supported"), } } diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs index 21ad2a835..567a5669d 100644 --- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs +++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs @@ -20,34 +20,36 @@ fn codegen_field<'tcx>( (base.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()), field_layout) }; - if let Some(extra) = extra { - if field_layout.is_sized() { - return simple(fx); - } - match field_layout.ty.kind() { - ty::Slice(..) | ty::Str | ty::Foreign(..) => simple(fx), - ty::Adt(def, _) if def.repr().packed() => { - assert_eq!(layout.align.abi.bytes(), 1); - simple(fx) - } - _ => { - // We have to align the offset for DST's - let unaligned_offset = field_offset.bytes(); - let (_, unsized_align) = - crate::unsize::size_and_align_of_dst(fx, field_layout, extra); + if field_layout.is_sized() { + return simple(fx); + } + match field_layout.ty.kind() { + ty::Slice(..) | ty::Str => simple(fx), + _ => { + let unaligned_offset = field_offset.bytes(); - let one = fx.bcx.ins().iconst(fx.pointer_type, 1); - let align_sub_1 = fx.bcx.ins().isub(unsized_align, one); - let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64); - let zero = fx.bcx.ins().iconst(fx.pointer_type, 0); - let and_rhs = fx.bcx.ins().isub(zero, unsized_align); - let offset = fx.bcx.ins().band(and_lhs, and_rhs); + // Get the alignment of the field + let (_, mut unsized_align) = crate::unsize::size_and_align_of(fx, field_layout, extra); - (base.offset_value(fx, offset), field_layout) + // For packed types, we need to cap alignment. + if let ty::Adt(def, _) = layout.ty.kind() { + if let Some(packed) = def.repr().pack { + let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64); + let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed); + unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed); + } } + + // Bump the unaligned offset up to the appropriate alignment + let one = fx.bcx.ins().iconst(fx.pointer_type, 1); + let align_sub_1 = fx.bcx.ins().isub(unsized_align, one); + let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64); + let zero = fx.bcx.ins().iconst(fx.pointer_type, 0); + let and_rhs = fx.bcx.ins().isub(zero, unsized_align); + let offset = fx.bcx.ins().band(and_lhs, and_rhs); + + (base.offset_value(fx, offset), field_layout) } - } else { - simple(fx) } } @@ -329,7 +331,13 @@ impl<'tcx> CValue<'tcx> { let msb = fx.bcx.ins().iconst(types::I64, (const_val >> 64) as u64 as i64); fx.bcx.ins().iconcat(lsb, msb) } - ty::Bool | ty::Char | ty::Uint(_) | ty::Int(_) | ty::Ref(..) | ty::RawPtr(..) => { + ty::Bool + | ty::Char + | ty::Uint(_) + | ty::Int(_) + | ty::Ref(..) + | ty::RawPtr(..) + | ty::FnPtr(..) => { let raw_val = const_val.size().truncate(const_val.to_bits(layout.size).unwrap()); fx.bcx.ins().iconst(clif_ty, raw_val as i64) } @@ -725,13 +733,8 @@ impl<'tcx> CPlace<'tcx> { }; let (field_ptr, field_layout) = codegen_field(fx, base, extra, layout, field); - if field_layout.is_unsized() { - if let ty::Foreign(_) = field_layout.ty.kind() { - assert!(extra.is_none()); - CPlace::for_ptr(field_ptr, field_layout) - } else { - CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout) - } + if has_ptr_meta(fx.tcx, field_layout.ty) { + CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout) } else { CPlace::for_ptr(field_ptr, field_layout) } @@ -971,6 +974,32 @@ pub(crate) fn assert_assignable<'tcx>( } } } + (&ty::Coroutine(def_id_a, args_a, mov_a), &ty::Coroutine(def_id_b, args_b, mov_b)) + if def_id_a == def_id_b && mov_a == mov_b => + { + let mut types_a = args_a.types(); + let mut types_b = args_b.types(); + loop { + match (types_a.next(), types_b.next()) { + (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1), + (None, None) => return, + (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty), + } + } + } + (&ty::CoroutineWitness(def_id_a, args_a), &ty::CoroutineWitness(def_id_b, args_b)) + if def_id_a == def_id_b => + { + let mut types_a = args_a.types(); + let mut types_b = args_b.types(); + loop { + match (types_a.next(), types_b.next()) { + (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1), + (None, None) => return, + (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty), + } + } + } (ty::Param(_), _) | (_, ty::Param(_)) if fx.tcx.sess.opts.unstable_opts.polymorphize => { // No way to check if it is correct or not with polymorphization enabled } diff --git a/compiler/rustc_codegen_cranelift/y.cmd b/compiler/rustc_codegen_cranelift/y.cmd new file mode 100644 index 000000000..e9b688645 --- /dev/null +++ b/compiler/rustc_codegen_cranelift/y.cmd @@ -0,0 +1,9 @@ +@echo off +echo [BUILD] build system >&2 +mkdir build 2>nul +rustc build_system/main.rs -o build\y.exe -Cdebuginfo=1 --edition 2021 || goto :error +build\y.exe %* || goto :error +goto :EOF + +:error +exit /b diff --git a/compiler/rustc_codegen_cranelift/y.ps1 b/compiler/rustc_codegen_cranelift/y.ps1 new file mode 100644 index 000000000..02ef0fcbd --- /dev/null +++ b/compiler/rustc_codegen_cranelift/y.ps1 @@ -0,0 +1,12 @@ +$ErrorActionPreference = "Stop" + +$host.ui.WriteErrorLine("[BUILD] build system") +New-Item -ItemType Directory -Force -Path build | Out-Null +& rustc build_system/main.rs -o build\y.exe -Cdebuginfo=1 --edition 2021 +if ($LASTEXITCODE -ne 0) { + exit $LASTEXITCODE +} +& build\y.exe $args +if ($LASTEXITCODE -ne 0) { + exit $LASTEXITCODE +} |