diff options
Diffstat (limited to '')
4 files changed, 229 insertions, 77 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs index d02dfd93c..5120b89c4 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/cpuid.rs @@ -62,7 +62,7 @@ pub(crate) fn codegen_cpuid_call<'tcx>( fx.bcx.ins().jump(dest, &[zero, zero, proc_info_ecx, proc_info_edx]); fx.bcx.switch_to_block(unsupported_leaf); - crate::trap::trap_unreachable( + crate::trap::trap_unimplemented( fx, "__cpuid_count arch intrinsic doesn't yet support specified leaf", ); diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs index 869670c8c..783d426c3 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs @@ -14,6 +14,10 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( target: Option<BasicBlock>, ) { match intrinsic { + "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => { + // Spin loop hint + } + // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8` "llvm.x86.sse2.pmovmskb.128" | "llvm.x86.avx2.pmovmskb" | "llvm.x86.sse2.movmsk.pd" => { intrinsic_args!(fx, args => (a); intrinsic); @@ -25,8 +29,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( let mut res = fx.bcx.ins().iconst(types::I32, 0); for lane in (0..lane_count).rev() { - let a_lane = - a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); + let a_lane = a.value_lane(fx, lane).load_scalar(fx); // cast float to int let a_lane = match lane_ty { @@ -139,6 +142,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( .sess .warn(&format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic)); crate::trap::trap_unimplemented(fx, intrinsic); + return; } } diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index b2a83e1d4..0302b843a 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -44,7 +44,7 @@ fn report_atomic_type_validation_error<'tcx>( ), ); // Prevent verifier error - crate::trap::trap_unreachable(fx, "compilation should not have succeeded"); + fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); } pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Option<Type> { @@ -53,7 +53,7 @@ pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx _ => unreachable!(), }; - match scalar_to_clif_type(tcx, element).by(u16::try_from(count).unwrap()) { + match scalar_to_clif_type(tcx, element).by(u32::try_from(count).unwrap()) { // Cranelift currently only implements icmp for 128bit vectors. Some(vector_ty) if vector_ty.bits() == 128 => Some(vector_ty), _ => None, @@ -84,6 +84,30 @@ fn simd_for_each_lane<'tcx>( } } +fn simd_pair_for_each_lane_typed<'tcx>( + fx: &mut FunctionCx<'_, '_, 'tcx>, + x: CValue<'tcx>, + y: CValue<'tcx>, + ret: CPlace<'tcx>, + f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, CValue<'tcx>, CValue<'tcx>) -> CValue<'tcx>, +) { + assert_eq!(x.layout(), y.layout()); + let layout = x.layout(); + + let (lane_count, _lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, _ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_count, ret_lane_count); + + for lane_idx in 0..lane_count { + let x_lane = x.value_lane(fx, lane_idx); + let y_lane = y.value_lane(fx, lane_idx); + + let res_lane = f(fx, x_lane, y_lane); + + ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane); + } +} + fn simd_pair_for_each_lane<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, x: CValue<'tcx>, @@ -203,7 +227,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>( sym::transmute => { crate::base::codegen_panic(fx, "Transmuting to uninhabited type.", source_info); } - _ => unimplemented!("unsupported instrinsic {}", intrinsic), + _ => unimplemented!("unsupported intrinsic {}", intrinsic), } return; }; @@ -301,7 +325,44 @@ fn codegen_float_intrinsic_call<'tcx>( _ => unreachable!(), }; - let res = fx.easy_call(name, &args, ty); + let layout = fx.layout_of(ty); + let res = match intrinsic { + sym::fmaf32 | sym::fmaf64 => { + let a = args[0].load_scalar(fx); + let b = args[1].load_scalar(fx); + let c = args[2].load_scalar(fx); + CValue::by_val(fx.bcx.ins().fma(a, b, c), layout) + } + sym::copysignf32 | sym::copysignf64 => { + let a = args[0].load_scalar(fx); + let b = args[1].load_scalar(fx); + CValue::by_val(fx.bcx.ins().fcopysign(a, b), layout) + } + sym::fabsf32 + | sym::fabsf64 + | sym::floorf32 + | sym::floorf64 + | sym::ceilf32 + | sym::ceilf64 + | sym::truncf32 + | sym::truncf64 => { + let a = args[0].load_scalar(fx); + + let val = match intrinsic { + sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(a), + sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(a), + sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(a), + sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(a), + _ => unreachable!(), + }; + + CValue::by_val(val, layout) + } + // These intrinsics aren't supported natively by Cranelift. + // Lower them to a libcall. + _ => fx.easy_call(name, &args, ty), + }; + ret.write_cvalue(fx, res); true @@ -320,9 +381,6 @@ fn codegen_regular_intrinsic_call<'tcx>( let usize_layout = fx.layout_of(fx.tcx.types.usize); match intrinsic { - sym::assume => { - intrinsic_args!(fx, args => (_a); intrinsic); - } sym::likely | sym::unlikely => { intrinsic_args!(fx, args => (a); intrinsic); @@ -470,37 +528,7 @@ fn codegen_regular_intrinsic_call<'tcx>( _ => unreachable!(), }; - let signed = type_sign(lhs.layout().ty); - - let checked_res = crate::num::codegen_checked_int_binop(fx, bin_op, lhs, rhs); - - let (val, has_overflow) = checked_res.load_scalar_pair(fx); - let clif_ty = fx.clif_type(lhs.layout().ty).unwrap(); - - let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed); - - let val = match (intrinsic, signed) { - (sym::saturating_add, false) => fx.bcx.ins().select(has_overflow, max, val), - (sym::saturating_sub, false) => fx.bcx.ins().select(has_overflow, min, val), - (sym::saturating_add, true) => { - let rhs = rhs.load_scalar(fx); - let rhs_ge_zero = - fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0); - let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min); - fx.bcx.ins().select(has_overflow, sat_val, val) - } - (sym::saturating_sub, true) => { - let rhs = rhs.load_scalar(fx); - let rhs_ge_zero = - fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0); - let sat_val = fx.bcx.ins().select(rhs_ge_zero, min, max); - fx.bcx.ins().select(has_overflow, sat_val, val) - } - _ => unreachable!(), - }; - - let res = CValue::by_val(val, lhs.layout()); - + let res = crate::num::codegen_saturating_int_binop(fx, bin_op, lhs, rhs); ret.write_cvalue(fx, res); } sym::rotate_left => { @@ -540,6 +568,13 @@ fn codegen_regular_intrinsic_call<'tcx>( ret.write_cvalue(fx, CValue::by_val(res, base.layout())); } + sym::ptr_mask => { + intrinsic_args!(fx, args => (ptr, mask); intrinsic); + let ptr = ptr.load_scalar(fx); + let mask = mask.load_scalar(fx); + fx.bcx.ins().band(ptr, mask); + } + sym::transmute => { intrinsic_args!(fx, args => (from); intrinsic); @@ -775,18 +810,11 @@ fn codegen_regular_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); } - sym::ptr_guaranteed_eq => { + sym::ptr_guaranteed_cmp => { intrinsic_args!(fx, args => (a, b); intrinsic); - let val = crate::num::codegen_ptr_binop(fx, BinOp::Eq, a, b); - ret.write_cvalue(fx, val); - } - - sym::ptr_guaranteed_ne => { - intrinsic_args!(fx, args => (a, b); intrinsic); - - let val = crate::num::codegen_ptr_binop(fx, BinOp::Ne, a, b); - ret.write_cvalue(fx, val); + let val = crate::num::codegen_ptr_binop(fx, BinOp::Eq, a, b).load_scalar(fx); + ret.write_cvalue(fx, CValue::by_val(val, fx.layout_of(fx.tcx.types.u8))); } sym::caller_location => { @@ -818,8 +846,6 @@ fn codegen_regular_intrinsic_call<'tcx>( if fx.tcx.is_compiler_builtins(LOCAL_CRATE) { // special case for compiler-builtins to avoid having to patch it crate::trap::trap_unimplemented(fx, "128bit atomics not yet supported"); - let ret_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(ret_block, &[]); return; } else { fx.tcx @@ -851,8 +877,6 @@ fn codegen_regular_intrinsic_call<'tcx>( if fx.tcx.is_compiler_builtins(LOCAL_CRATE) { // special case for compiler-builtins to avoid having to patch it crate::trap::trap_unimplemented(fx, "128bit atomics not yet supported"); - let ret_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(ret_block, &[]); return; } else { fx.tcx @@ -1176,7 +1200,7 @@ fn codegen_regular_intrinsic_call<'tcx>( // FIXME once unwinding is supported, change this to actually catch panics let f_sig = fx.bcx.func.import_signature(Signature { call_conv: fx.target_config.default_call_conv, - params: vec![AbiParam::new(fx.bcx.func.dfg.value_type(data))], + params: vec![AbiParam::new(pointer_ty(fx.tcx))], returns: vec![], }); diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index 30e3d1125..51fce8c85 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -2,6 +2,7 @@ use rustc_middle::ty::subst::SubstsRef; use rustc_span::Symbol; +use rustc_target::abi::Endian; use super::*; use crate::prelude::*; @@ -14,7 +15,7 @@ fn report_simd_type_validation_error( ) { fx.tcx.sess.span_err(span, &format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty)); // Prevent verifier error - crate::trap::trap_unreachable(fx, "compilation should not have succeeded"); + fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); } pub(super) fn codegen_simd_intrinsic_call<'tcx>( @@ -26,7 +27,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( span: Span, ) { match intrinsic { - sym::simd_cast => { + sym::simd_as | sym::simd_cast => { intrinsic_args!(fx, args => (a); intrinsic); if !a.layout().ty.is_simd() { @@ -157,11 +158,12 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( ), ); // Prevent verifier error - crate::trap::trap_unreachable(fx, "compilation should not have succeeded"); + fx.bcx.ins().trap(TrapCode::UnreachableCodeReached); return; } } } else { + // FIXME remove this case intrinsic.as_str()["simd_shuffle".len()..].parse().unwrap() }; @@ -186,7 +188,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let size = Size::from_bytes( 4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */ ); - alloc.inner().get_bytes(fx, alloc_range(offset, size)).unwrap() + alloc + .inner() + .get_bytes_strip_provenance(fx, alloc_range(offset, size)) + .unwrap() } _ => unreachable!("{:?}", idx_const), }; @@ -274,12 +279,17 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( idx_const } else { fx.tcx.sess.span_warn(span, "Index argument for `simd_extract` is not a constant"); - let res = crate::trap::trap_unimplemented_ret_value( + let trap_block = fx.bcx.create_block(); + let dummy_block = fx.bcx.create_block(); + let true_ = fx.bcx.ins().iconst(types::I8, 1); + fx.bcx.ins().brnz(true_, trap_block, &[]); + fx.bcx.ins().jump(dummy_block, &[]); + fx.bcx.switch_to_block(trap_block); + crate::trap::trap_unimplemented( fx, - ret.layout(), "Index argument for `simd_extract` is not a constant", ); - ret.write_cvalue(fx, res); + fx.bcx.switch_to_block(dummy_block); return; }; @@ -392,21 +402,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let layout = a.layout(); let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let res_lane_layout = fx.layout_of(lane_ty); for lane in 0..lane_count { - let a_lane = a.value_lane(fx, lane); - let b_lane = b.value_lane(fx, lane); - let c_lane = c.value_lane(fx, lane); + let a_lane = a.value_lane(fx, lane).load_scalar(fx); + let b_lane = b.value_lane(fx, lane).load_scalar(fx); + let c_lane = c.value_lane(fx, lane).load_scalar(fx); - let res_lane = match lane_ty.kind() { - ty::Float(FloatTy::F32) => { - fx.easy_call("fmaf", &[a_lane, b_lane, c_lane], lane_ty) - } - ty::Float(FloatTy::F64) => { - fx.easy_call("fma", &[a_lane, b_lane, c_lane], lane_ty) - } - _ => unreachable!(), - }; + let res_lane = fx.bcx.ins().fma(a_lane, b_lane, c_lane); + let res_lane = CValue::by_val(res_lane, res_lane_layout); ret.place_lane(fx, lane).write_cvalue(fx, res_lane); } @@ -648,8 +652,128 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( } } - // simd_saturating_* - // simd_bitmask + sym::simd_select_bitmask => { + intrinsic_args!(fx, args => (m, a, b); intrinsic); + + if !a.layout().ty.is_simd() { + report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty); + return; + } + assert_eq!(a.layout(), b.layout()); + + let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); + let lane_layout = fx.layout_of(lane_ty); + + let m = m.load_scalar(fx); + + for lane in 0..lane_count { + let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64); + let m_lane = fx.bcx.ins().band_imm(m_lane, 1); + let a_lane = a.value_lane(fx, lane).load_scalar(fx); + let b_lane = b.value_lane(fx, lane).load_scalar(fx); + + let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0); + let res_lane = + CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout); + + ret.place_lane(fx, lane).write_cvalue(fx, res_lane); + } + } + + sym::simd_bitmask => { + intrinsic_args!(fx, args => (a); intrinsic); + + let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); + let lane_clif_ty = fx.clif_type(lane_ty).unwrap(); + + // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a + // vector mask and returns the most significant bit (MSB) of each lane in the form + // of either: + // * an unsigned integer + // * an array of `u8` + // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits. + // + // The bit order of the result depends on the byte endianness, LSB-first for little + // endian and MSB-first for big endian. + let expected_int_bits = lane_count.max(8); + let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64); + + match lane_ty.kind() { + ty::Int(_) | ty::Uint(_) => {} + _ => { + fx.tcx.sess.span_fatal( + span, + &format!( + "invalid monomorphization of `simd_bitmask` intrinsic: \ + vector argument `{}`'s element type `{}`, expected integer element \ + type", + a.layout().ty, + lane_ty + ), + ); + } + } + + let res_type = + Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap(); + let mut res = fx.bcx.ins().iconst(res_type, 0); + + let lanes = match fx.tcx.sess.target.endian { + Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>, + Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>, + }; + for lane in lanes { + let a_lane = a.value_lane(fx, lane).load_scalar(fx); + + // extract sign bit of an int + let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1)); + + // shift sign bit into result + let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false); + res = fx.bcx.ins().ishl_imm(res, 1); + res = fx.bcx.ins().bor(res, a_lane_sign); + } + + match ret.layout().ty.kind() { + ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {} + ty::Array(elem, len) + if matches!(elem.kind(), ty::Uint(ty::UintTy::U8)) + && len.try_eval_usize(fx.tcx, ty::ParamEnv::reveal_all()) + == Some(expected_bytes) => {} + _ => { + fx.tcx.sess.span_fatal( + span, + &format!( + "invalid monomorphization of `simd_bitmask` intrinsic: \ + cannot return `{}`, expected `u{}` or `[u8; {}]`", + ret.layout().ty, + expected_int_bits, + expected_bytes + ), + ); + } + } + + let res = CValue::by_val(res, ret.layout()); + ret.write_cvalue(fx, res); + } + + sym::simd_saturating_add | sym::simd_saturating_sub => { + intrinsic_args!(fx, args => (x, y); intrinsic); + + let bin_op = match intrinsic { + sym::simd_saturating_add => BinOp::Add, + sym::simd_saturating_sub => BinOp::Sub, + _ => unreachable!(), + }; + + // FIXME use vector instructions when possible + simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| { + crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane) + }); + } + + // simd_arith_offset // simd_scatter // simd_gather _ => { |