diff options
Diffstat (limited to 'src/test/codegen/swap-large-types.rs')
-rw-r--r-- | src/test/codegen/swap-large-types.rs | 91 |
1 files changed, 0 insertions, 91 deletions
diff --git a/src/test/codegen/swap-large-types.rs b/src/test/codegen/swap-large-types.rs deleted file mode 100644 index 4a6840357..000000000 --- a/src/test/codegen/swap-large-types.rs +++ /dev/null @@ -1,91 +0,0 @@ -// compile-flags: -O -// only-x86_64 -// ignore-debug: the debug assertions get in the way - -#![crate_type = "lib"] - -use std::mem::swap; -use std::ptr::{read, copy_nonoverlapping, write}; - -type KeccakBuffer = [[u64; 5]; 5]; - -// A basic read+copy+write swap implementation ends up copying one of the values -// to stack for large types, which is completely unnecessary as the lack of -// overlap means we can just do whatever fits in registers at a time. - -// CHECK-LABEL: @swap_basic -#[no_mangle] -pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { -// CHECK: alloca [5 x [5 x i64]] - - // SAFETY: exclusive references are always valid to read/write, - // are non-overlapping, and nothing here panics so it's drop-safe. - unsafe { - let z = read(x); - copy_nonoverlapping(y, x, 1); - write(y, z); - } -} - -// This test verifies that the library does something smarter, and thus -// doesn't need any scratch space on the stack. - -// CHECK-LABEL: @swap_std -#[no_mangle] -pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i64> -// CHECK: store <{{[0-9]+}} x i64> - swap(x, y) -} - -// Verify that types with usize alignment are swapped via vectored usizes, -// not falling back to byte-level code. - -// CHECK-LABEL: @swap_slice -#[no_mangle] -pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i64> -// CHECK: store <{{[0-9]+}} x i64> - if x.len() == y.len() { - x.swap_with_slice(y); - } -} - -// But for a large align-1 type, vectorized byte copying is what we want. - -type OneKilobyteBuffer = [u8; 1024]; - -// CHECK-LABEL: @swap_1kb_slices -#[no_mangle] -pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) { -// CHECK-NOT: alloca -// CHECK: load <{{[0-9]+}} x i8> -// CHECK: store <{{[0-9]+}} x i8> - if x.len() == y.len() { - x.swap_with_slice(y); - } -} - -// This verifies that the 2×read + 2×write optimizes to just 3 memcpys -// for an unusual type like this. It's not clear whether we should do anything -// smarter in Rust for these, so for now it's fine to leave these up to the backend. -// That's not as bad as it might seem, as for example, LLVM will lower the -// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature. -// Eventually we'll be able to pass `align_of::<T>` to a const generic and -// thus pick a smarter chunk size ourselves without huge code duplication. - -#[repr(align(64))] -pub struct BigButHighlyAligned([u8; 64 * 3]); - -// CHECK-LABEL: @swap_big_aligned -#[no_mangle] -pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) { -// CHECK-NOT: call void @llvm.memcpy -// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192) -// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192) -// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192) -// CHECK-NOT: call void @llvm.memcpy - swap(x, y) -} |