summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_codegen_cranelift/src/intrinsics
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src/intrinsics')
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs200
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs23
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs138
3 files changed, 327 insertions, 34 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index 56d8f13ce..24ad0083a 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -19,7 +19,10 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
- "llvm.x86.sse2.pmovmskb.128" | "llvm.x86.avx2.pmovmskb" | "llvm.x86.sse2.movmsk.pd" => {
+ "llvm.x86.sse2.pmovmskb.128"
+ | "llvm.x86.avx2.pmovmskb"
+ | "llvm.x86.sse.movmsk.ps"
+ | "llvm.x86.sse2.movmsk.pd" => {
intrinsic_args!(fx, args => (a); intrinsic);
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
@@ -107,16 +110,209 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
};
let a = codegen_operand(fx, a);
let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+ .expect("llvm.x86.sse2.pslli.d imm8 not const");
+
+ simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+ .try_to_bits(Size::from_bytes(4))
+ .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+ {
+ imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+ _ => fx.bcx.ins().iconst(types::I32, 0),
+ });
+ }
+ "llvm.x86.sse2.psrli.w" => {
+ let (a, imm8) = match args {
+ [a, imm8] => (a, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
.expect("llvm.x86.sse2.psrli.d imm8 not const");
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
.try_to_bits(Size::from_bytes(4))
.unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
{
+ imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+ _ => fx.bcx.ins().iconst(types::I32, 0),
+ });
+ }
+ "llvm.x86.sse2.pslli.w" => {
+ let (a, imm8) = match args {
+ [a, imm8] => (a, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+ .expect("llvm.x86.sse2.pslli.d imm8 not const");
+
+ simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+ .try_to_bits(Size::from_bytes(4))
+ .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+ {
+ imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+ _ => fx.bcx.ins().iconst(types::I32, 0),
+ });
+ }
+ "llvm.x86.avx.psrli.d" => {
+ let (a, imm8) = match args {
+ [a, imm8] => (a, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+ .expect("llvm.x86.avx.psrli.d imm8 not const");
+
+ simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+ .try_to_bits(Size::from_bytes(4))
+ .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+ {
+ imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+ _ => fx.bcx.ins().iconst(types::I32, 0),
+ });
+ }
+ "llvm.x86.avx.pslli.d" => {
+ let (a, imm8) = match args {
+ [a, imm8] => (a, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+ .expect("llvm.x86.avx.pslli.d imm8 not const");
+
+ simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+ .try_to_bits(Size::from_bytes(4))
+ .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+ {
imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
_ => fx.bcx.ins().iconst(types::I32, 0),
});
}
+ "llvm.x86.avx2.psrli.w" => {
+ let (a, imm8) = match args {
+ [a, imm8] => (a, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+ .expect("llvm.x86.avx.psrli.w imm8 not const");
+
+ simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+ .try_to_bits(Size::from_bytes(4))
+ .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+ {
+ imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+ _ => fx.bcx.ins().iconst(types::I32, 0),
+ });
+ }
+ "llvm.x86.avx2.pslli.w" => {
+ let (a, imm8) = match args {
+ [a, imm8] => (a, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+ .expect("llvm.x86.avx.pslli.w imm8 not const");
+
+ simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+ .try_to_bits(Size::from_bytes(4))
+ .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+ {
+ imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+ _ => fx.bcx.ins().iconst(types::I32, 0),
+ });
+ }
+ "llvm.x86.ssse3.pshuf.b.128" | "llvm.x86.avx2.pshuf.b" => {
+ let (a, b) = match args {
+ [a, b] => (a, b),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let b = codegen_operand(fx, b);
+
+ // Based on the pseudocode at https://github.com/rust-lang/stdarch/blob/1cfbca8b38fd9b4282b2f054f61c6ca69fc7ce29/crates/core_arch/src/x86/avx2.rs#L2319-L2332
+ let zero = fx.bcx.ins().iconst(types::I8, 0);
+ for i in 0..16 {
+ let b_lane = b.value_lane(fx, i).load_scalar(fx);
+ let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
+ let a_idx = fx.bcx.ins().band_imm(b_lane, 0xf);
+ let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
+ let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
+ let res = fx.bcx.ins().select(is_zero, zero, a_lane);
+ ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+ }
+
+ if intrinsic == "llvm.x86.avx2.pshuf.b" {
+ for i in 16..32 {
+ let b_lane = b.value_lane(fx, i).load_scalar(fx);
+ let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
+ let b_lane_masked = fx.bcx.ins().band_imm(b_lane, 0xf);
+ let a_idx = fx.bcx.ins().iadd_imm(b_lane_masked, 16);
+ let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
+ let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
+ let res = fx.bcx.ins().select(is_zero, zero, a_lane);
+ ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+ }
+ }
+ }
+ "llvm.x86.avx2.vperm2i128" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
+ let (a, b, imm8) = match args {
+ [a, b, imm8] => (a, b, imm8),
+ _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+ };
+ let a = codegen_operand(fx, a);
+ let b = codegen_operand(fx, b);
+ let imm8 = codegen_operand(fx, imm8).load_scalar(fx);
+
+ let a_0 = a.value_lane(fx, 0).load_scalar(fx);
+ let a_1 = a.value_lane(fx, 1).load_scalar(fx);
+ let a_low = fx.bcx.ins().iconcat(a_0, a_1);
+ let a_2 = a.value_lane(fx, 2).load_scalar(fx);
+ let a_3 = a.value_lane(fx, 3).load_scalar(fx);
+ let a_high = fx.bcx.ins().iconcat(a_2, a_3);
+
+ let b_0 = b.value_lane(fx, 0).load_scalar(fx);
+ let b_1 = b.value_lane(fx, 1).load_scalar(fx);
+ let b_low = fx.bcx.ins().iconcat(b_0, b_1);
+ let b_2 = b.value_lane(fx, 2).load_scalar(fx);
+ let b_3 = b.value_lane(fx, 3).load_scalar(fx);
+ let b_high = fx.bcx.ins().iconcat(b_2, b_3);
+
+ fn select4(
+ fx: &mut FunctionCx<'_, '_, '_>,
+ a_high: Value,
+ a_low: Value,
+ b_high: Value,
+ b_low: Value,
+ control: Value,
+ ) -> Value {
+ let a_or_b = fx.bcx.ins().band_imm(control, 0b0010);
+ let high_or_low = fx.bcx.ins().band_imm(control, 0b0001);
+ let is_zero = fx.bcx.ins().band_imm(control, 0b1000);
+
+ let zero = fx.bcx.ins().iconst(types::I64, 0);
+ let zero = fx.bcx.ins().iconcat(zero, zero);
+
+ let res_a = fx.bcx.ins().select(high_or_low, a_high, a_low);
+ let res_b = fx.bcx.ins().select(high_or_low, b_high, b_low);
+ let res = fx.bcx.ins().select(a_or_b, res_b, res_a);
+ fx.bcx.ins().select(is_zero, zero, res)
+ }
+
+ let control0 = imm8;
+ let res_low = select4(fx, a_high, a_low, b_high, b_low, control0);
+ let (res_0, res_1) = fx.bcx.ins().isplit(res_low);
+
+ let control1 = fx.bcx.ins().ushr_imm(imm8, 4);
+ let res_high = select4(fx, a_high, a_low, b_high, b_low, control1);
+ let (res_2, res_3) = fx.bcx.ins().isplit(res_high);
+
+ ret.place_lane(fx, 0).to_ptr().store(fx, res_0, MemFlags::trusted());
+ ret.place_lane(fx, 1).to_ptr().store(fx, res_1, MemFlags::trusted());
+ ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted());
+ ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted());
+ }
"llvm.x86.sse2.storeu.dq" => {
intrinsic_args!(fx, args => (mem_addr, a); intrinsic);
let mem_addr = mem_addr.load_scalar(fx);
@@ -190,7 +386,7 @@ fn llvm_add_sub<'tcx>(
// carry0 | carry1 -> carry or borrow respectively
let cb_out = fx.bcx.ins().bor(cb0, cb1);
- let layout = fx.layout_of(fx.tcx.mk_tup(&[fx.tcx.types.u8, fx.tcx.types.u64]));
+ let layout = fx.layout_of(Ty::new_tup(fx.tcx, &[fx.tcx.types.u8, fx.tcx.types.u64]));
let val = CValue::by_val_pair(cb_out, c, layout);
ret.write_cvalue(fx, val);
}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 0a513b08b..5862f1829 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -472,28 +472,11 @@ fn codegen_regular_intrinsic_call<'tcx>(
ret.write_cvalue(fx, CValue::by_val(align, usize_layout));
}
- sym::unchecked_add
- | sym::unchecked_sub
- | sym::unchecked_mul
- | sym::unchecked_div
- | sym::exact_div
- | sym::unchecked_rem
- | sym::unchecked_shl
- | sym::unchecked_shr => {
+ sym::exact_div => {
intrinsic_args!(fx, args => (x, y); intrinsic);
- // FIXME trap on overflow
- let bin_op = match intrinsic {
- sym::unchecked_add => BinOp::Add,
- sym::unchecked_sub => BinOp::Sub,
- sym::unchecked_mul => BinOp::Mul,
- sym::unchecked_div | sym::exact_div => BinOp::Div,
- sym::unchecked_rem => BinOp::Rem,
- sym::unchecked_shl => BinOp::Shl,
- sym::unchecked_shr => BinOp::Shr,
- _ => unreachable!(),
- };
- let res = crate::num::codegen_int_binop(fx, bin_op, x, y);
+ // FIXME trap on inexact
+ let res = crate::num::codegen_int_binop(fx, BinOp::Div, x, y);
ret.write_cvalue(fx, res);
}
sym::saturating_add | sym::saturating_sub => {
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index 5a038bfca..6741362e8 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -434,8 +434,36 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
});
}
- sym::simd_round => {
- intrinsic_args!(fx, args => (a); intrinsic);
+ sym::simd_fpow => {
+ intrinsic_args!(fx, args => (a, b); intrinsic);
+
+ if !a.layout().ty.is_simd() {
+ report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+ return;
+ }
+
+ simd_pair_for_each_lane(fx, a, b, ret, &|fx, lane_ty, _ret_lane_ty, a_lane, b_lane| {
+ match lane_ty.kind() {
+ ty::Float(FloatTy::F32) => fx.lib_call(
+ "powf",
+ vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+ vec![AbiParam::new(types::F32)],
+ &[a_lane, b_lane],
+ )[0],
+ ty::Float(FloatTy::F64) => fx.lib_call(
+ "pow",
+ vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+ vec![AbiParam::new(types::F64)],
+ &[a_lane, b_lane],
+ )[0],
+ _ => unreachable!("{:?}", lane_ty),
+ }
+ });
+ }
+
+ sym::simd_fpowi => {
+ intrinsic_args!(fx, args => (a, exp); intrinsic);
+ let exp = exp.load_scalar(fx);
if !a.layout().ty.is_simd() {
report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
@@ -448,22 +476,71 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
ret,
&|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() {
ty::Float(FloatTy::F32) => fx.lib_call(
- "roundf",
+ "__powisf2", // compiler-builtins
+ vec![AbiParam::new(types::F32), AbiParam::new(types::I32)],
vec![AbiParam::new(types::F32)],
- vec![AbiParam::new(types::F32)],
- &[lane],
+ &[lane, exp],
)[0],
ty::Float(FloatTy::F64) => fx.lib_call(
- "round",
- vec![AbiParam::new(types::F64)],
+ "__powidf2", // compiler-builtins
+ vec![AbiParam::new(types::F64), AbiParam::new(types::I32)],
vec![AbiParam::new(types::F64)],
- &[lane],
+ &[lane, exp],
)[0],
_ => unreachable!("{:?}", lane_ty),
},
);
}
+ sym::simd_fsin
+ | sym::simd_fcos
+ | sym::simd_fexp
+ | sym::simd_fexp2
+ | sym::simd_flog
+ | sym::simd_flog10
+ | sym::simd_flog2
+ | sym::simd_round => {
+ intrinsic_args!(fx, args => (a); intrinsic);
+
+ if !a.layout().ty.is_simd() {
+ report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+ return;
+ }
+
+ simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
+ let lane_ty = match lane_ty.kind() {
+ ty::Float(FloatTy::F32) => types::F32,
+ ty::Float(FloatTy::F64) => types::F64,
+ _ => unreachable!("{:?}", lane_ty),
+ };
+ let name = match (intrinsic, lane_ty) {
+ (sym::simd_fsin, types::F32) => "sinf",
+ (sym::simd_fsin, types::F64) => "sin",
+ (sym::simd_fcos, types::F32) => "cosf",
+ (sym::simd_fcos, types::F64) => "cos",
+ (sym::simd_fexp, types::F32) => "expf",
+ (sym::simd_fexp, types::F64) => "exp",
+ (sym::simd_fexp2, types::F32) => "exp2f",
+ (sym::simd_fexp2, types::F64) => "exp2",
+ (sym::simd_flog, types::F32) => "logf",
+ (sym::simd_flog, types::F64) => "log",
+ (sym::simd_flog10, types::F32) => "log10f",
+ (sym::simd_flog10, types::F64) => "log10",
+ (sym::simd_flog2, types::F32) => "log2f",
+ (sym::simd_flog2, types::F64) => "log2",
+ (sym::simd_round, types::F32) => "roundf",
+ (sym::simd_round, types::F64) => "round",
+ _ => unreachable!("{:?}", intrinsic),
+ };
+ fx.lib_call(
+ name,
+ vec![AbiParam::new(lane_ty)],
+ vec![AbiParam::new(lane_ty)],
+ &[lane],
+ )[0]
+ });
+ }
+
sym::simd_fabs | sym::simd_fsqrt | sym::simd_ceil | sym::simd_floor | sym::simd_trunc => {
intrinsic_args!(fx, args => (a); intrinsic);
@@ -488,7 +565,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
});
}
- sym::simd_reduce_add_ordered | sym::simd_reduce_add_unordered => {
+ sym::simd_reduce_add_ordered => {
intrinsic_args!(fx, args => (v, acc); intrinsic);
let acc = acc.load_scalar(fx);
@@ -507,7 +584,25 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
});
}
- sym::simd_reduce_mul_ordered | sym::simd_reduce_mul_unordered => {
+ sym::simd_reduce_add_unordered => {
+ intrinsic_args!(fx, args => (v); intrinsic);
+
+ // FIXME there must be no acc param for integer vectors
+ if !v.layout().ty.is_simd() {
+ report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+ return;
+ }
+
+ simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
+ if lane_ty.is_floating_point() {
+ fx.bcx.ins().fadd(a, b)
+ } else {
+ fx.bcx.ins().iadd(a, b)
+ }
+ });
+ }
+
+ sym::simd_reduce_mul_ordered => {
intrinsic_args!(fx, args => (v, acc); intrinsic);
let acc = acc.load_scalar(fx);
@@ -526,6 +621,24 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
});
}
+ sym::simd_reduce_mul_unordered => {
+ intrinsic_args!(fx, args => (v); intrinsic);
+
+ // FIXME there must be no acc param for integer vectors
+ if !v.layout().ty.is_simd() {
+ report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+ return;
+ }
+
+ simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
+ if lane_ty.is_floating_point() {
+ fx.bcx.ins().fmul(a, b)
+ } else {
+ fx.bcx.ins().imul(a, b)
+ }
+ });
+ }
+
sym::simd_reduce_all => {
intrinsic_args!(fx, args => (v); intrinsic);
@@ -581,7 +694,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
}
- sym::simd_reduce_min => {
+ sym::simd_reduce_min | sym::simd_reduce_min_nanless => {
intrinsic_args!(fx, args => (v); intrinsic);
if !v.layout().ty.is_simd() {
@@ -600,7 +713,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
});
}
- sym::simd_reduce_max => {
+ sym::simd_reduce_max | sym::simd_reduce_max_nanless => {
intrinsic_args!(fx, args => (v); intrinsic);
if !v.layout().ty.is_simd() {
@@ -878,6 +991,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
fx.tcx.sess.span_err(span, format!("Unknown SIMD intrinsic {}", intrinsic));
// Prevent verifier error
fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+ return;
}
}
let ret_block = fx.get_block(target);