diff options
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs')
-rw-r--r-- | compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs | 68 |
1 files changed, 58 insertions, 10 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index ea137c4ca..fe4f073f7 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -1,7 +1,6 @@ //! Codegen `extern "platform-intrinsic"` intrinsics. -use rustc_middle::ty::GenericArgsRef; -use rustc_span::Symbol; +use cranelift_codegen::ir::immediates::Offset32; use rustc_target::abi::Endian; use super::*; @@ -282,11 +281,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant"); }; - let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + let idx: u32 = idx_const + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count), @@ -331,10 +330,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; let idx = idx_const - .try_to_bits(Size::from_bytes(4 /* u32*/)) - .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const)); + .try_to_u32() + .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const)); let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx); - if idx >= lane_count.into() { + if u64::from(idx) >= lane_count { fx.tcx.sess.span_fatal( fx.mir.span, format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count), @@ -1008,8 +1007,57 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( } } + sym::simd_masked_load => { + intrinsic_args!(fx, args => (mask, ptr, val); intrinsic); + + let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); + let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(val_lane_count, mask_lane_count); + assert_eq!(val_lane_count, ret_lane_count); + + let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap(); + let ret_lane_layout = fx.layout_of(ret_lane_ty); + let ptr_val = ptr.load_scalar(fx); + + for lane_idx in 0..ret_lane_count { + let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx); + let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx); + + let if_enabled = fx.bcx.create_block(); + let if_disabled = fx.bcx.create_block(); + let next = fx.bcx.create_block(); + let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); + + fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); + fx.bcx.seal_block(if_enabled); + fx.bcx.seal_block(if_disabled); + + fx.bcx.switch_to_block(if_enabled); + let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32; + let res = fx.bcx.ins().load( + lane_clif_ty, + MemFlags::trusted(), + ptr_val, + Offset32::new(offset), + ); + fx.bcx.ins().jump(next, &[res]); + + fx.bcx.switch_to_block(if_disabled); + fx.bcx.ins().jump(next, &[val_lane]); + + fx.bcx.seal_block(next); + fx.bcx.switch_to_block(next); + + fx.bcx.ins().nop(); + + ret.place_lane(fx, lane_idx) + .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout)); + } + } + sym::simd_scatter => { - intrinsic_args!(fx, args => (val, ptr, mask); intrinsic); + intrinsic_args!(fx, args => (mask, ptr, val); intrinsic); let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx); let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx); |