summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs')
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs68
1 files changed, 58 insertions, 10 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index ea137c4ca..fe4f073f7 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -1,7 +1,6 @@
//! Codegen `extern "platform-intrinsic"` intrinsics.
-use rustc_middle::ty::GenericArgsRef;
-use rustc_span::Symbol;
+use cranelift_codegen::ir::immediates::Offset32;
use rustc_target::abi::Endian;
use super::*;
@@ -282,11 +281,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant");
};
- let idx = idx_const
- .try_to_bits(Size::from_bytes(4 /* u32*/))
- .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const));
+ let idx: u32 = idx_const
+ .try_to_u32()
+ .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx);
- if idx >= lane_count.into() {
+ if u64::from(idx) >= lane_count {
fx.tcx.sess.span_fatal(
fx.mir.span,
format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count),
@@ -331,10 +330,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
};
let idx = idx_const
- .try_to_bits(Size::from_bytes(4 /* u32*/))
- .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const));
+ .try_to_u32()
+ .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx);
- if idx >= lane_count.into() {
+ if u64::from(idx) >= lane_count {
fx.tcx.sess.span_fatal(
fx.mir.span,
format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count),
@@ -1008,8 +1007,57 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
}
}
+ sym::simd_masked_load => {
+ intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
+
+ let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+ let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+ let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+ assert_eq!(val_lane_count, mask_lane_count);
+ assert_eq!(val_lane_count, ret_lane_count);
+
+ let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+ let ret_lane_layout = fx.layout_of(ret_lane_ty);
+ let ptr_val = ptr.load_scalar(fx);
+
+ for lane_idx in 0..ret_lane_count {
+ let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+ let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+ let if_enabled = fx.bcx.create_block();
+ let if_disabled = fx.bcx.create_block();
+ let next = fx.bcx.create_block();
+ let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+ fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+ fx.bcx.seal_block(if_enabled);
+ fx.bcx.seal_block(if_disabled);
+
+ fx.bcx.switch_to_block(if_enabled);
+ let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
+ let res = fx.bcx.ins().load(
+ lane_clif_ty,
+ MemFlags::trusted(),
+ ptr_val,
+ Offset32::new(offset),
+ );
+ fx.bcx.ins().jump(next, &[res]);
+
+ fx.bcx.switch_to_block(if_disabled);
+ fx.bcx.ins().jump(next, &[val_lane]);
+
+ fx.bcx.seal_block(next);
+ fx.bcx.switch_to_block(next);
+
+ fx.bcx.ins().nop();
+
+ ret.place_lane(fx, lane_idx)
+ .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+ }
+ }
+
sym::simd_scatter => {
- intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+ intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);