From 023939b627b7dc93b01471f7d41fb8553ddb4ffa Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 05:59:24 +0200 Subject: Merging upstream version 1.73.0+dfsg1. Signed-off-by: Daniel Baumann --- tests/codegen/simd/simd-wide-sum.rs | 59 +++++++++++++++++++++++++++++++++ tests/codegen/simd/simd_arith_offset.rs | 26 +++++++++++++++ tests/codegen/simd/swap-simd-types.rs | 41 +++++++++++++++++++++++ tests/codegen/simd/unpadded-simd.rs | 14 ++++++++ 4 files changed, 140 insertions(+) create mode 100644 tests/codegen/simd/simd-wide-sum.rs create mode 100644 tests/codegen/simd/simd_arith_offset.rs create mode 100644 tests/codegen/simd/swap-simd-types.rs create mode 100644 tests/codegen/simd/unpadded-simd.rs (limited to 'tests/codegen/simd') diff --git a/tests/codegen/simd/simd-wide-sum.rs b/tests/codegen/simd/simd-wide-sum.rs new file mode 100644 index 000000000..3116f9597 --- /dev/null +++ b/tests/codegen/simd/simd-wide-sum.rs @@ -0,0 +1,59 @@ +// revisions: llvm mir-opt3 +// compile-flags: -C opt-level=3 -Z merge-functions=disabled --edition=2021 +// only-x86_64 +// ignore-debug: the debug assertions get in the way +// [mir-opt3]compile-flags: -Zmir-opt-level=3 +// [mir-opt3]build-pass + +// mir-opt3 is a regression test for https://github.com/rust-lang/rust/issues/98016 + +#![crate_type = "lib"] +#![feature(portable_simd)] + +use std::simd::{Simd, SimdUint}; +const N: usize = 8; + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_simd +pub fn wider_reduce_simd(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + let x: Simd = x.cast(); + x.reduce_sum() +} + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_loop +pub fn wider_reduce_loop(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + let mut sum = 0_u16; + for i in 0..N { + sum += u16::from(x[i]); + } + sum +} + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_iter +pub fn wider_reduce_iter(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + x.as_array().iter().copied().map(u16::from).sum() +} + +// This iterator one is the most interesting, as it's the one +// which used to not auto-vectorize due to a suboptimality in the +// `::fold` implementation. + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_into_iter +pub fn wider_reduce_into_iter(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + x.to_array().into_iter().map(u16::from).sum() +} diff --git a/tests/codegen/simd/simd_arith_offset.rs b/tests/codegen/simd/simd_arith_offset.rs new file mode 100644 index 000000000..1ee73de11 --- /dev/null +++ b/tests/codegen/simd/simd_arith_offset.rs @@ -0,0 +1,26 @@ +// compile-flags: -C no-prepopulate-passes +// only-64bit (because the LLVM type of i64 for usize shows up) +// + +#![crate_type = "lib"] +#![feature(repr_simd, platform_intrinsics)] + +extern "platform-intrinsic" { + pub(crate) fn simd_arith_offset(ptrs: T, offsets: U) -> T; +} + +/// A vector of *const T. +#[derive(Debug, Copy, Clone)] +#[repr(simd)] +pub struct SimdConstPtr([*const T; LANES]); + +#[derive(Debug, Copy, Clone)] +#[repr(simd)] +pub struct Simd([T; LANES]); + +// CHECK-LABEL: smoke +#[no_mangle] +pub fn smoke(ptrs: SimdConstPtr, offsets: Simd) -> SimdConstPtr { + // CHECK: getelementptr i8, <8 x ptr> %0, <8 x i64> %1 + unsafe { simd_arith_offset(ptrs, offsets) } +} diff --git a/tests/codegen/simd/swap-simd-types.rs b/tests/codegen/simd/swap-simd-types.rs new file mode 100644 index 000000000..3472a42b0 --- /dev/null +++ b/tests/codegen/simd/swap-simd-types.rs @@ -0,0 +1,41 @@ +// compile-flags: -O -C target-feature=+avx +// only-x86_64 +// ignore-debug: the debug assertions get in the way + +#![crate_type = "lib"] + +use std::mem::swap; + +// SIMD types are highly-aligned already, so make sure the swap code leaves their +// types alone and doesn't pessimize them (such as by swapping them as `usize`s). +extern crate core; +use core::arch::x86_64::__m256; + +// CHECK-LABEL: @swap_single_m256 +#[no_mangle] +pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) { +// CHECK-NOT: alloca +// CHECK: load <8 x float>{{.+}}align 32 +// CHECK: store <8 x float>{{.+}}align 32 + swap(x, y) +} + +// CHECK-LABEL: @swap_m256_slice +#[no_mangle] +pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) { +// CHECK-NOT: alloca +// CHECK: load <8 x float>{{.+}}align 32 +// CHECK: store <8 x float>{{.+}}align 32 + if x.len() == y.len() { + x.swap_with_slice(y); + } +} + +// CHECK-LABEL: @swap_bytes32 +#[no_mangle] +pub fn swap_bytes32(x: &mut [u8; 32], y: &mut [u8; 32]) { +// CHECK-NOT: alloca +// CHECK: load <32 x i8>{{.+}}align 1 +// CHECK: store <32 x i8>{{.+}}align 1 + swap(x, y) +} diff --git a/tests/codegen/simd/unpadded-simd.rs b/tests/codegen/simd/unpadded-simd.rs new file mode 100644 index 000000000..eb44dbd93 --- /dev/null +++ b/tests/codegen/simd/unpadded-simd.rs @@ -0,0 +1,14 @@ +// Make sure that no 0-sized padding is inserted in structs and that +// structs are represented as expected by Neon intrinsics in LLVM. +// See #87254. + +#![crate_type = "lib"] +#![feature(repr_simd)] + +#[derive(Copy, Clone, Debug)] +#[repr(simd)] +pub struct int16x4_t(pub i16, pub i16, pub i16, pub i16); + +#[derive(Copy, Clone, Debug)] +pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); +// CHECK: %int16x4x2_t = type { <4 x i16>, <4 x i16> } -- cgit v1.2.3