summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_codegen_cranelift/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:26:03 +0000
commit9918693037dce8aa4bb6f08741b6812923486c18 (patch)
tree21d2b40bec7e6a7ea664acee056eb3d08e15a1cf /compiler/rustc_codegen_cranelift/src
parentReleasing progress-linux version 1.75.0+dfsg1-5~progress7.99u1. (diff)
downloadrustc-9918693037dce8aa4bb6f08741b6812923486c18.tar.xz
rustc-9918693037dce8aa4bb6f08741b6812923486c18.zip
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/comments.rs1
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/mod.rs3
-rw-r--r--compiler/rustc_codegen_cranelift/src/analyze.rs1
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs22
-rw-r--r--compiler/rustc_codegen_cranelift/src/common.rs14
-rw-r--r--compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/constant.rs52
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/inline_asm.rs167
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs715
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs20
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs68
-rw-r--r--compiler/rustc_codegen_cranelift/src/pretty_clif.rs21
-rw-r--r--compiler/rustc_codegen_cranelift/src/unsize.rs114
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs93
17 files changed, 995 insertions, 308 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/comments.rs b/compiler/rustc_codegen_cranelift/src/abi/comments.rs
index ade6968de..a318cae17 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/comments.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/comments.rs
@@ -3,7 +3,6 @@
use std::borrow::Cow;
-use rustc_middle::mir;
use rustc_target::abi::call::PassMode;
use crate::prelude::*;
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index c4572e035..2c194f6d6 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -6,7 +6,7 @@ mod returning;
use std::borrow::Cow;
-use cranelift_codegen::ir::{AbiParam, SigRef};
+use cranelift_codegen::ir::SigRef;
use cranelift_module::ModuleError;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::ty::layout::FnAbiOf;
@@ -383,6 +383,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
args,
ret_place,
target,
+ source_info.span,
);
return;
}
diff --git a/compiler/rustc_codegen_cranelift/src/analyze.rs b/compiler/rustc_codegen_cranelift/src/analyze.rs
index 321612238..c5762638a 100644
--- a/compiler/rustc_codegen_cranelift/src/analyze.rs
+++ b/compiler/rustc_codegen_cranelift/src/analyze.rs
@@ -2,7 +2,6 @@
use rustc_index::IndexVec;
use rustc_middle::mir::StatementKind::*;
-use rustc_middle::ty::Ty;
use crate::prelude::*;
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 91b1547cb..df40a5eb4 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -176,10 +176,10 @@ pub(crate) fn compile_fn(
match module.define_function(codegened_func.func_id, context) {
Ok(()) => {}
Err(ModuleError::Compilation(CodegenError::ImplLimitExceeded)) => {
- let handler = rustc_session::EarlyErrorHandler::new(
+ let early_dcx = rustc_session::EarlyDiagCtxt::new(
rustc_session::config::ErrorOutputType::default(),
);
- handler.early_error(format!(
+ early_dcx.early_error(format!(
"backend implementation limit exceeded while compiling {name}",
name = codegened_func.symbol_name
));
@@ -353,7 +353,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
fx,
rustc_hir::LangItem::PanicBoundsCheck,
&[index, len, location],
- source_info.span,
+ Some(source_info.span),
);
}
AssertKind::MisalignedPointerDereference { ref required, ref found } => {
@@ -365,7 +365,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
fx,
rustc_hir::LangItem::PanicMisalignedPointerDereference,
&[required, found, location],
- source_info.span,
+ Some(source_info.span),
);
}
_ => {
@@ -456,7 +456,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
);
}
- crate::inline_asm::codegen_inline_asm(
+ crate::inline_asm::codegen_inline_asm_terminator(
fx,
source_info.span,
template,
@@ -945,19 +945,19 @@ pub(crate) fn codegen_panic<'tcx>(
let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap());
let args = [msg_ptr, msg_len, location];
- codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, source_info.span);
+ codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, Some(source_info.span));
}
pub(crate) fn codegen_panic_nounwind<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
msg_str: &str,
- source_info: mir::SourceInfo,
+ span: Option<Span>,
) {
let msg_ptr = fx.anonymous_str(msg_str);
let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap());
let args = [msg_ptr, msg_len];
- codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, source_info.span);
+ codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, span);
}
pub(crate) fn codegen_unwind_terminate<'tcx>(
@@ -967,16 +967,16 @@ pub(crate) fn codegen_unwind_terminate<'tcx>(
) {
let args = [];
- codegen_panic_inner(fx, reason.lang_item(), &args, source_info.span);
+ codegen_panic_inner(fx, reason.lang_item(), &args, Some(source_info.span));
}
fn codegen_panic_inner<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
lang_item: rustc_hir::LangItem,
args: &[Value],
- span: Span,
+ span: Option<Span>,
) {
- let def_id = fx.tcx.require_lang_item(lang_item, Some(span));
+ let def_id = fx.tcx.require_lang_item(lang_item, span);
let instance = Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx);
let symbol_name = fx.tcx.symbol_name(instance).name;
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 63562d335..bd19a7ed0 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -98,11 +98,15 @@ fn clif_pair_type_from_ty<'tcx>(
/// Is a pointer to this type a fat ptr?
pub(crate) fn has_ptr_meta<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> bool {
- let ptr_ty = Ty::new_ptr(tcx, TypeAndMut { ty, mutbl: rustc_hir::Mutability::Not });
- match &tcx.layout_of(ParamEnv::reveal_all().and(ptr_ty)).unwrap().abi {
- Abi::Scalar(_) => false,
- Abi::ScalarPair(_, _) => true,
- abi => unreachable!("Abi of ptr to {:?} is {:?}???", ty, abi),
+ if ty.is_sized(tcx, ParamEnv::reveal_all()) {
+ return false;
+ }
+
+ let tail = tcx.struct_tail_erasing_lifetimes(ty, ParamEnv::reveal_all());
+ match tail.kind() {
+ ty::Foreign(..) => false,
+ ty::Str | ty::Slice(..) | ty::Dynamic(..) => true,
+ _ => bug!("unexpected unsized tail: {:?}", tail),
}
}
diff --git a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
index 20f2ee4c7..967896913 100644
--- a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
+++ b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
@@ -46,7 +46,7 @@ impl ConcurrencyLimiter {
}
}
- pub(super) fn acquire(&mut self, handler: &rustc_errors::Handler) -> ConcurrencyLimiterToken {
+ pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken {
let mut state = self.state.lock().unwrap();
loop {
state.assert_invariants();
@@ -64,7 +64,7 @@ impl ConcurrencyLimiter {
// Make sure to drop the mutex guard first to prevent poisoning the mutex.
drop(state);
if let Some(err) = err {
- handler.fatal(err).raise();
+ dcx.fatal(err);
} else {
// The error was already emitted, but compilation continued. Raise a silent
// fatal error.
diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs
index b0853d30e..9ffa006e5 100644
--- a/compiler/rustc_codegen_cranelift/src/constant.rs
+++ b/compiler/rustc_codegen_cranelift/src/constant.rs
@@ -1,10 +1,12 @@
//! Handling of `static`s, `const`s and promoted allocations
+use std::cmp::Ordering;
+
use cranelift_module::*;
-use rustc_data_structures::fx::{FxHashMap, FxHashSet};
+use rustc_data_structures::fx::FxHashSet;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar};
-use rustc_middle::mir::ConstValue;
+use rustc_middle::ty::ScalarInt;
use crate::prelude::*;
@@ -123,7 +125,8 @@ pub(crate) fn codegen_const_value<'tcx>(
}
}
Scalar::Ptr(ptr, _size) => {
- let (alloc_id, offset) = ptr.into_parts(); // we know the `offset` is relative
+ let (prov, offset) = ptr.into_parts(); // we know the `offset` is relative
+ let alloc_id = prov.alloc_id();
let base_addr = match fx.tcx.global_alloc(alloc_id) {
GlobalAlloc::Memory(alloc) => {
let data_id = data_id_for_alloc_id(
@@ -371,7 +374,8 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
data.define(bytes.into_boxed_slice());
- for &(offset, alloc_id) in alloc.provenance().ptrs().iter() {
+ for &(offset, prov) in alloc.provenance().ptrs().iter() {
+ let alloc_id = prov.alloc_id();
let addend = {
let endianness = tcx.data_layout.endian;
let offset = offset.bytes() as usize;
@@ -430,9 +434,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
pub(crate) fn mir_operand_get_const_val<'tcx>(
fx: &FunctionCx<'_, '_, 'tcx>,
operand: &Operand<'tcx>,
-) -> Option<ConstValue<'tcx>> {
+) -> Option<ScalarInt> {
match operand {
- Operand::Constant(const_) => Some(eval_mir_constant(fx, const_).0),
+ Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(),
// FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored
// inside a temporary before being passed to the intrinsic requiring the const argument.
// This code tries to find a single constant defining definition of the referenced local.
@@ -440,7 +444,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
if !place.projection.is_empty() {
return None;
}
- let mut computed_const_val = None;
+ let mut computed_scalar_int = None;
for bb_data in fx.mir.basic_blocks.iter() {
for stmt in &bb_data.statements {
match &stmt.kind {
@@ -456,22 +460,38 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
operand,
ty,
) => {
- if computed_const_val.is_some() {
+ if computed_scalar_int.is_some() {
return None; // local assigned twice
}
if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) {
return None;
}
- let const_val = mir_operand_get_const_val(fx, operand)?;
- if fx.layout_of(*ty).size
- != const_val.try_to_scalar_int()?.size()
+ let scalar_int = mir_operand_get_const_val(fx, operand)?;
+ let scalar_int = match fx
+ .layout_of(*ty)
+ .size
+ .cmp(&scalar_int.size())
{
- return None;
- }
- computed_const_val = Some(const_val);
+ Ordering::Equal => scalar_int,
+ Ordering::Less => match ty.kind() {
+ ty::Uint(_) => ScalarInt::try_from_uint(
+ scalar_int.try_to_uint(scalar_int.size()).unwrap(),
+ fx.layout_of(*ty).size,
+ )
+ .unwrap(),
+ ty::Int(_) => ScalarInt::try_from_int(
+ scalar_int.try_to_int(scalar_int.size()).unwrap(),
+ fx.layout_of(*ty).size,
+ )
+ .unwrap(),
+ _ => unreachable!(),
+ },
+ Ordering::Greater => return None,
+ };
+ computed_scalar_int = Some(scalar_int);
}
Rvalue::Use(operand) => {
- computed_const_val = mir_operand_get_const_val(fx, operand)
+ computed_scalar_int = mir_operand_get_const_val(fx, operand)
}
_ => return None,
}
@@ -522,7 +542,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
TerminatorKind::Call { .. } => {}
}
}
- computed_const_val
+ computed_scalar_int
}
}
}
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index 11229dd42..b3ab533df 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -422,7 +422,7 @@ pub(crate) fn run_aot(
backend_config.clone(),
global_asm_config.clone(),
cgu.name(),
- concurrency_limiter.acquire(tcx.sess.diagnostic()),
+ concurrency_limiter.acquire(tcx.sess.dcx()),
),
module_codegen,
Some(rustc_middle::dep_graph::hash_result),
diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
index ce0eecca8..73f4bc7c1 100644
--- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs
+++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
@@ -3,14 +3,13 @@
use std::fmt::Write;
use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
-use rustc_middle::mir::InlineAsmOperand;
use rustc_span::sym;
use rustc_target::asm::*;
use target_lexicon::BinaryFormat;
use crate::prelude::*;
-enum CInlineAsmOperand<'tcx> {
+pub(crate) enum CInlineAsmOperand<'tcx> {
In {
reg: InlineAsmRegOrRegClass,
value: Value,
@@ -34,7 +33,7 @@ enum CInlineAsmOperand<'tcx> {
},
}
-pub(crate) fn codegen_inline_asm<'tcx>(
+pub(crate) fn codegen_inline_asm_terminator<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
span: Span,
template: &[InlineAsmTemplatePiece],
@@ -42,8 +41,6 @@ pub(crate) fn codegen_inline_asm<'tcx>(
options: InlineAsmOptions,
destination: Option<mir::BasicBlock>,
) {
- // FIXME add .eh_frame unwind info directives
-
// Used by panic_abort on Windows, but uses a syntax which only happens to work with
// asm!() by accident and breaks with the GNU assembler as well as global_asm!() for
// the LLVM backend.
@@ -135,15 +132,33 @@ pub(crate) fn codegen_inline_asm<'tcx>(
})
.collect::<Vec<_>>();
- let mut inputs = Vec::new();
- let mut outputs = Vec::new();
+ codegen_inline_asm_inner(fx, template, &operands, options);
+
+ match destination {
+ Some(destination) => {
+ let destination_block = fx.get_block(destination);
+ fx.bcx.ins().jump(destination_block, &[]);
+ }
+ None => {
+ fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+ }
+ }
+}
+
+pub(crate) fn codegen_inline_asm_inner<'tcx>(
+ fx: &mut FunctionCx<'_, '_, 'tcx>,
+ template: &[InlineAsmTemplatePiece],
+ operands: &[CInlineAsmOperand<'tcx>],
+ options: InlineAsmOptions,
+) {
+ // FIXME add .eh_frame unwind info directives
let mut asm_gen = InlineAssemblyGenerator {
tcx: fx.tcx,
arch: fx.tcx.sess.asm_arch.unwrap(),
enclosing_def_id: fx.instance.def_id(),
template,
- operands: &operands,
+ operands,
options,
registers: Vec::new(),
stack_slots_clobber: Vec::new(),
@@ -165,6 +180,8 @@ pub(crate) fn codegen_inline_asm<'tcx>(
let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
fx.cx.global_asm.push_str(&generated_asm);
+ let mut inputs = Vec::new();
+ let mut outputs = Vec::new();
for (i, operand) in operands.iter().enumerate() {
match operand {
CInlineAsmOperand::In { reg: _, value } => {
@@ -186,16 +203,6 @@ pub(crate) fn codegen_inline_asm<'tcx>(
}
call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
-
- match destination {
- Some(destination) => {
- let destination_block = fx.get_block(destination);
- fx.bcx.ins().jump(destination_block, &[]);
- }
- None => {
- fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
- }
- }
}
struct InlineAssemblyGenerator<'a, 'tcx> {
@@ -637,8 +644,21 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
) {
match arch {
InlineAsmArch::X86_64 => {
- write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
- reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
+ match reg {
+ InlineAsmReg::X86(reg)
+ if reg as u32 >= X86InlineAsmReg::xmm0 as u32
+ && reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
+ {
+ // rustc emits x0 rather than xmm0
+ write!(generated_asm, " movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
+ write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
+ .unwrap();
+ }
+ _ => {
+ write!(generated_asm, " mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
+ reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
+ }
+ }
generated_asm.push('\n');
}
InlineAsmArch::AArch64 => {
@@ -663,8 +683,24 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
) {
match arch {
InlineAsmArch::X86_64 => {
- generated_asm.push_str(" mov ");
- reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
+ match reg {
+ InlineAsmReg::X86(reg)
+ if reg as u32 >= X86InlineAsmReg::xmm0 as u32
+ && reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
+ {
+ // rustc emits x0 rather than xmm0
+ write!(
+ generated_asm,
+ " movups xmm{}",
+ reg as u32 - X86InlineAsmReg::xmm0 as u32
+ )
+ .unwrap();
+ }
+ _ => {
+ generated_asm.push_str(" mov ");
+ reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
+ }
+ }
writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
}
InlineAsmArch::AArch64 => {
@@ -720,7 +756,12 @@ fn call_inline_asm<'tcx>(
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
for (offset, place) in outputs {
- let ty = fx.clif_type(place.layout().ty).unwrap();
+ let ty = if place.layout().ty.is_simd() {
+ let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
+ fx.clif_type(lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
+ } else {
+ fx.clif_type(place.layout().ty).unwrap()
+ };
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
fx,
ty,
@@ -729,83 +770,3 @@ fn call_inline_asm<'tcx>(
place.write_cvalue(fx, CValue::by_val(value, place.layout()));
}
}
-
-pub(crate) fn codegen_xgetbv<'tcx>(
- fx: &mut FunctionCx<'_, '_, 'tcx>,
- xcr_no: Value,
- ret: CPlace<'tcx>,
-) {
- // FIXME add .eh_frame unwind info directives
-
- let operands = vec![
- CInlineAsmOperand::In {
- reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
- value: xcr_no,
- },
- CInlineAsmOperand::Out {
- reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
- late: true,
- place: Some(ret),
- },
- CInlineAsmOperand::Out {
- reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
- late: true,
- place: None,
- },
- ];
- let options = InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM;
-
- let mut inputs = Vec::new();
- let mut outputs = Vec::new();
-
- let mut asm_gen = InlineAssemblyGenerator {
- tcx: fx.tcx,
- arch: fx.tcx.sess.asm_arch.unwrap(),
- enclosing_def_id: fx.instance.def_id(),
- template: &[InlineAsmTemplatePiece::String(
- "
- xgetbv
- // out = rdx << 32 | rax
- shl rdx, 32
- or rax, rdx
- "
- .to_string(),
- )],
- operands: &operands,
- options,
- registers: Vec::new(),
- stack_slots_clobber: Vec::new(),
- stack_slots_input: Vec::new(),
- stack_slots_output: Vec::new(),
- stack_slot_size: Size::from_bytes(0),
- };
- asm_gen.allocate_registers();
- asm_gen.allocate_stack_slots();
-
- let inline_asm_index = fx.cx.inline_asm_index.get();
- fx.cx.inline_asm_index.set(inline_asm_index + 1);
- let asm_name = format!(
- "__inline_asm_{}_n{}",
- fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
- inline_asm_index
- );
-
- let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
- fx.cx.global_asm.push_str(&generated_asm);
-
- for (i, operand) in operands.iter().enumerate() {
- match operand {
- CInlineAsmOperand::In { reg: _, value } => {
- inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value));
- }
- CInlineAsmOperand::Out { reg: _, late: _, place } => {
- if let Some(place) = place {
- outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place));
- }
- }
- _ => unreachable!(),
- }
- }
-
- call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
-}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
index e9b7daf14..dbd5db875 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
@@ -1,7 +1,5 @@
//! Emulate LLVM intrinsics
-use rustc_middle::ty::GenericArgsRef;
-
use crate::intrinsics::*;
use crate::prelude::*;
@@ -12,6 +10,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>,
target: Option<BasicBlock>,
+ span: Span,
) {
if intrinsic.starts_with("llvm.aarch64") {
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
@@ -31,6 +30,7 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
args,
ret,
target,
+ span,
);
}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
index ee098be1f..e1e514dca 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
@@ -1,7 +1,5 @@
//! Emulate AArch64 LLVM intrinsics
-use rustc_middle::ty::GenericArgsRef;
-
use crate::intrinsics::*;
use crate::prelude::*;
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index 4c5360486..99bb5c4ea 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -1,7 +1,9 @@
//! Emulate x86 LLVM intrinsics
-use rustc_middle::ty::GenericArgsRef;
+use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
+use rustc_target::asm::*;
+use crate::inline_asm::{codegen_inline_asm_inner, CInlineAsmOperand};
use crate::intrinsics::*;
use crate::prelude::*;
@@ -12,19 +14,53 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
args: &[mir::Operand<'tcx>],
ret: CPlace<'tcx>,
target: Option<BasicBlock>,
+ span: Span,
) {
match intrinsic {
"llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
// Spin loop hint
}
+ "llvm.x86.avx.vzeroupper" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper&ig_expand=7218
+ // Do nothing. It is a perf hint anyway.
+ }
+
// Used by is_x86_feature_detected!();
"llvm.x86.xgetbv" => {
intrinsic_args!(fx, args => (xcr_no); intrinsic);
let xcr_no = xcr_no.load_scalar(fx);
- crate::inline_asm::codegen_xgetbv(fx, xcr_no, ret);
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String(
+ "
+ xgetbv
+ // out = rdx << 32 | rax
+ shl rdx, 32
+ or rax, rdx
+ "
+ .to_string(),
+ )],
+ &[
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
+ value: xcr_no,
+ },
+ CInlineAsmOperand::Out {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+ late: true,
+ place: Some(ret),
+ },
+ CInlineAsmOperand::Out {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+ late: true,
+ place: None,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
}
"llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => {
@@ -37,6 +73,103 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
ret.write_cvalue(fx, val);
}
+ "llvm.x86.avx2.gather.d.d"
+ | "llvm.x86.avx2.gather.d.q"
+ | "llvm.x86.avx2.gather.d.ps"
+ | "llvm.x86.avx2.gather.d.pd"
+ | "llvm.x86.avx2.gather.d.d.256"
+ | "llvm.x86.avx2.gather.d.q.256"
+ | "llvm.x86.avx2.gather.d.ps.256"
+ | "llvm.x86.avx2.gather.d.pd.256"
+ | "llvm.x86.avx2.gather.q.d"
+ | "llvm.x86.avx2.gather.q.q"
+ | "llvm.x86.avx2.gather.q.ps"
+ | "llvm.x86.avx2.gather.q.pd"
+ | "llvm.x86.avx2.gather.q.d.256"
+ | "llvm.x86.avx2.gather.q.q.256"
+ | "llvm.x86.avx2.gather.q.ps.256"
+ | "llvm.x86.avx2.gather.q.pd.256" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd&ig_expand=3819
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd&ig_expand=3821
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd&ig_expand=3822
+ // ...
+
+ intrinsic_args!(fx, args => (src, ptr, index, mask, scale); intrinsic);
+
+ let (src_lane_count, src_lane_ty) = src.layout().ty.simd_size_and_type(fx.tcx);
+ let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx);
+ let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+ let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+ assert_eq!(src_lane_ty, ret_lane_ty);
+ assert!(index_lane_ty.is_integral());
+ assert_eq!(src_lane_count, mask_lane_count);
+ assert_eq!(src_lane_count, ret_lane_count);
+
+ let lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
+ let index_lane_clif_ty = fx.clif_type(index_lane_ty).unwrap();
+ let mask_lane_clif_ty = fx.clif_type(mask_lane_ty).unwrap();
+ let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+ let ptr = ptr.load_scalar(fx);
+ let scale = scale.load_scalar(fx);
+ let scale = fx.bcx.ins().uextend(types::I64, scale);
+ for lane_idx in 0..std::cmp::min(src_lane_count, index_lane_count) {
+ let src_lane = src.value_lane(fx, lane_idx).load_scalar(fx);
+ let index_lane = index.value_lane(fx, lane_idx).load_scalar(fx);
+ let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+ let mask_lane =
+ fx.bcx.ins().bitcast(mask_lane_clif_ty.as_int(), MemFlags::new(), mask_lane);
+
+ let if_enabled = fx.bcx.create_block();
+ let if_disabled = fx.bcx.create_block();
+ let next = fx.bcx.create_block();
+ let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+ let mask_lane = match mask_lane_clif_ty {
+ types::I32 | types::F32 => {
+ fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64)
+ }
+ types::I64 | types::F64 => {
+ fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64)
+ }
+ _ => unreachable!(),
+ };
+ fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+ fx.bcx.seal_block(if_enabled);
+ fx.bcx.seal_block(if_disabled);
+
+ fx.bcx.switch_to_block(if_enabled);
+ let index_lane = if index_lane_clif_ty != types::I64 {
+ fx.bcx.ins().sextend(types::I64, index_lane)
+ } else {
+ index_lane
+ };
+ let offset = fx.bcx.ins().imul(index_lane, scale);
+ let lane_ptr = fx.bcx.ins().iadd(ptr, offset);
+ let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), lane_ptr, 0);
+ fx.bcx.ins().jump(next, &[res]);
+
+ fx.bcx.switch_to_block(if_disabled);
+ fx.bcx.ins().jump(next, &[src_lane]);
+
+ fx.bcx.seal_block(next);
+ fx.bcx.switch_to_block(next);
+
+ fx.bcx.ins().nop();
+
+ ret.place_lane(fx, lane_idx)
+ .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+ }
+
+ for lane_idx in std::cmp::min(src_lane_count, index_lane_count)..ret_lane_count {
+ let zero_lane = fx.bcx.ins().iconst(mask_lane_clif_ty.as_int(), 0);
+ let zero_lane = fx.bcx.ins().bitcast(mask_lane_clif_ty, MemFlags::new(), zero_lane);
+ ret.place_lane(fx, lane_idx)
+ .write_cvalue(fx, CValue::by_val(zero_lane, ret_lane_layout));
+ }
+ }
+
"llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
let (x, y, kind) = match args {
[x, y, kind] => (x, y, kind),
@@ -241,16 +374,31 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
);
}
"llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
- let a = match args {
- [a] => a,
- _ => bug!("wrong number of args for intrinsic {intrinsic}"),
- };
- let a = codegen_operand(fx, a);
+ intrinsic_args!(fx, args => (a); intrinsic);
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
fx.bcx.ins().iabs(lane)
});
}
+ "llvm.x86.sse2.cvttps2dq" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32&ig_expand=2429
+ intrinsic_args!(fx, args => (a); intrinsic);
+ let a = a.load_scalar(fx);
+
+ // Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned
+ // into 0x80000000 for which Cranelift doesn't have a native instruction.
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))],
+ &[CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ }],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
"llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => {
intrinsic_args!(fx, args => (c_in, a, b); intrinsic);
let c_in = c_in.load_scalar(fx);
@@ -332,9 +480,11 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for out_lane_idx in 0..lane_count / 8 {
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
- for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
+ for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 {
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
+ let a_lane = fx.bcx.ins().uextend(types::I16, a_lane);
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
+ let b_lane = fx.bcx.ins().uextend(types::I16, b_lane);
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
@@ -405,12 +555,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
for out_lane_idx in 0..lane_count / 2 {
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
- let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
+ let a_lane0 = fx.bcx.ins().sextend(types::I32, a_lane0);
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
- let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
+ let a_lane1 = fx.bcx.ins().sextend(types::I32, a_lane1);
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
@@ -565,14 +715,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
assert_eq!(lane_count * 2, ret_lane_count);
- let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16));
- let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16));
+ let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
+ let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
for idx in 0..lane_count {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
- let sat = fx.bcx.ins().umin(sat, max_i16);
+ let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -582,7 +732,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for idx in 0..lane_count {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
- let sat = fx.bcx.ins().umin(sat, max_i16);
+ let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -609,8 +759,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for idx in 0..lane_count {
let lane = a.value_lane(fx, idx).load_scalar(fx);
- let sat = fx.bcx.ins().umax(lane, min_u16);
- let sat = fx.bcx.ins().umin(sat, max_u16);
+ let sat = fx.bcx.ins().smax(lane, min_u16);
+ let sat = fx.bcx.ins().smin(sat, max_u16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -619,8 +769,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for idx in 0..lane_count {
let lane = b.value_lane(fx, idx).load_scalar(fx);
- let sat = fx.bcx.ins().umax(lane, min_u16);
- let sat = fx.bcx.ins().umin(sat, max_u16);
+ let sat = fx.bcx.ins().smax(lane, min_u16);
+ let sat = fx.bcx.ins().smin(sat, max_u16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -641,14 +791,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
assert_eq!(lane_count * 2, ret_lane_count);
- let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16));
- let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16));
+ let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
+ let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
for idx in 0..lane_count / 2 {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
- let sat = fx.bcx.ins().umin(sat, max_i16);
+ let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -658,7 +808,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for idx in 0..lane_count / 2 {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
- let sat = fx.bcx.ins().umin(sat, max_i16);
+ let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -668,7 +818,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for idx in 0..lane_count / 2 {
let lane = a.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
- let sat = fx.bcx.ins().umin(sat, max_i16);
+ let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -678,7 +828,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
for idx in 0..lane_count / 2 {
let lane = b.value_lane(fx, idx).load_scalar(fx);
let sat = fx.bcx.ins().smax(lane, min_i16);
- let sat = fx.bcx.ins().umin(sat, max_i16);
+ let sat = fx.bcx.ins().smin(sat, max_i16);
let res = fx.bcx.ins().ireduce(types::I16, sat);
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -686,66 +836,489 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
}
- "llvm.x86.pclmulqdq" => {
- // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
- intrinsic_args!(fx, args => (a, b, imm8); intrinsic);
+ "llvm.x86.fma.vfmaddsub.ps"
+ | "llvm.x86.fma.vfmaddsub.pd"
+ | "llvm.x86.fma.vfmaddsub.ps.256"
+ | "llvm.x86.fma.vfmaddsub.pd.256" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
+ intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
+ assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
- assert_eq!(lane_ty, fx.tcx.types.i64);
- assert_eq!(ret_lane_ty, fx.tcx.types.i64);
- assert_eq!(lane_count, 2);
- assert_eq!(ret_lane_count, 2);
+ assert!(lane_ty.is_floating_point());
+ assert!(ret_lane_ty.is_floating_point());
+ assert_eq!(lane_count, ret_lane_count);
+ let ret_lane_layout = fx.layout_of(ret_lane_ty);
- let imm8 = imm8.load_scalar(fx);
+ for idx in 0..lane_count {
+ let a_lane = a.value_lane(fx, idx).load_scalar(fx);
+ let b_lane = b.value_lane(fx, idx).load_scalar(fx);
+ let c_lane = c.value_lane(fx, idx).load_scalar(fx);
+
+ let mul = fx.bcx.ins().fmul(a_lane, b_lane);
+ let res = if idx & 1 == 0 {
+ fx.bcx.ins().fsub(mul, c_lane)
+ } else {
+ fx.bcx.ins().fadd(mul, c_lane)
+ };
- let control0 = fx.bcx.ins().band_imm(imm8, 0b0000_0001);
- let a_lane0 = a.value_lane(fx, 0).load_scalar(fx);
- let a_lane1 = a.value_lane(fx, 1).load_scalar(fx);
- let temp1 = fx.bcx.ins().select(control0, a_lane1, a_lane0);
+ let res_lane = CValue::by_val(res, ret_lane_layout);
+ ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
+ }
+ }
- let control4 = fx.bcx.ins().band_imm(imm8, 0b0001_0000);
- let b_lane0 = b.value_lane(fx, 0).load_scalar(fx);
- let b_lane1 = b.value_lane(fx, 1).load_scalar(fx);
- let temp2 = fx.bcx.ins().select(control4, b_lane1, b_lane0);
+ "llvm.x86.fma.vfmsubadd.ps"
+ | "llvm.x86.fma.vfmsubadd.pd"
+ | "llvm.x86.fma.vfmsubadd.ps.256"
+ | "llvm.x86.fma.vfmsubadd.pd.256" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
+ intrinsic_args!(fx, args => (a, b, c); intrinsic);
- fn extract_bit(fx: &mut FunctionCx<'_, '_, '_>, val: Value, bit: i64) -> Value {
- let tmp = fx.bcx.ins().ushr_imm(val, bit);
- fx.bcx.ins().band_imm(tmp, 1)
- }
+ assert_eq!(a.layout(), b.layout());
+ assert_eq!(a.layout(), c.layout());
+ let layout = a.layout();
- let mut res1 = fx.bcx.ins().iconst(types::I64, 0);
- for i in 0..=63 {
- let x = extract_bit(fx, temp1, 0);
- let y = extract_bit(fx, temp2, i);
- let mut temp = fx.bcx.ins().band(x, y);
- for j in 1..=i {
- let x = extract_bit(fx, temp1, j);
- let y = extract_bit(fx, temp2, i - j);
- let z = fx.bcx.ins().band(x, y);
- temp = fx.bcx.ins().bxor(temp, z);
- }
- let temp = fx.bcx.ins().ishl_imm(temp, i);
- res1 = fx.bcx.ins().bor(res1, temp);
+ let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+ let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+ assert!(lane_ty.is_floating_point());
+ assert!(ret_lane_ty.is_floating_point());
+ assert_eq!(lane_count, ret_lane_count);
+ let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+ for idx in 0..lane_count {
+ let a_lane = a.value_lane(fx, idx).load_scalar(fx);
+ let b_lane = b.value_lane(fx, idx).load_scalar(fx);
+ let c_lane = c.value_lane(fx, idx).load_scalar(fx);
+
+ let mul = fx.bcx.ins().fmul(a_lane, b_lane);
+ let res = if idx & 1 == 0 {
+ fx.bcx.ins().fadd(mul, c_lane)
+ } else {
+ fx.bcx.ins().fsub(mul, c_lane)
+ };
+
+ let res_lane = CValue::by_val(res, ret_lane_layout);
+ ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
- ret.place_lane(fx, 0).to_ptr().store(fx, res1, MemFlags::trusted());
-
- let mut res2 = fx.bcx.ins().iconst(types::I64, 0);
- for i in 64..=127 {
- let mut temp = fx.bcx.ins().iconst(types::I64, 0);
- for j in i - 63..=63 {
- let x = extract_bit(fx, temp1, j);
- let y = extract_bit(fx, temp2, i - j);
- let z = fx.bcx.ins().band(x, y);
- temp = fx.bcx.ins().bxor(temp, z);
- }
- let temp = fx.bcx.ins().ishl_imm(temp, i);
- res2 = fx.bcx.ins().bor(res2, temp);
+ }
+
+ "llvm.x86.fma.vfnmadd.ps"
+ | "llvm.x86.fma.vfnmadd.pd"
+ | "llvm.x86.fma.vfnmadd.ps.256"
+ | "llvm.x86.fma.vfnmadd.pd.256" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
+ intrinsic_args!(fx, args => (a, b, c); intrinsic);
+
+ assert_eq!(a.layout(), b.layout());
+ assert_eq!(a.layout(), c.layout());
+ let layout = a.layout();
+
+ let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+ let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+ assert!(lane_ty.is_floating_point());
+ assert!(ret_lane_ty.is_floating_point());
+ assert_eq!(lane_count, ret_lane_count);
+ let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+ for idx in 0..lane_count {
+ let a_lane = a.value_lane(fx, idx).load_scalar(fx);
+ let b_lane = b.value_lane(fx, idx).load_scalar(fx);
+ let c_lane = c.value_lane(fx, idx).load_scalar(fx);
+
+ let mul = fx.bcx.ins().fmul(a_lane, b_lane);
+ let neg_mul = fx.bcx.ins().fneg(mul);
+ let res = fx.bcx.ins().fadd(neg_mul, c_lane);
+
+ let res_lane = CValue::by_val(res, ret_lane_layout);
+ ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
- ret.place_lane(fx, 1).to_ptr().store(fx, res2, MemFlags::trusted());
+ }
+
+ "llvm.x86.sse42.pcmpestri128" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
+ intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let la = la.load_scalar(fx);
+ let b = b.load_scalar(fx);
+ let lb = lb.load_scalar(fx);
+
+ let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4])
+ {
+ imm8
+ } else {
+ fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestri` is not a constant");
+ };
+
+ let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String(format!("pcmpestri xmm0, xmm1, {imm8}"))],
+ &[
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ value: a,
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: b,
+ },
+ // Implicit argument to the pcmpestri intrinsic
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+ value: la,
+ },
+ // Implicit argument to the pcmpestri intrinsic
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+ value: lb,
+ },
+ // Implicit result of the pcmpestri intrinsic
+ CInlineAsmOperand::Out {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
+ late: true,
+ place: Some(ret),
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.sse42.pcmpestrm128" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm&ig_expand=940
+ intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let la = la.load_scalar(fx);
+ let b = b.load_scalar(fx);
+ let lb = lb.load_scalar(fx);
+
+ let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4])
+ {
+ imm8
+ } else {
+ fx.tcx.sess.span_fatal(span, "Index argument for `_mm_cmpestrm` is not a constant");
+ };
+
+ let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String(format!("pcmpestrm xmm0, xmm1, {imm8}"))],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: b,
+ },
+ // Implicit argument to the pcmpestri intrinsic
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+ value: la,
+ },
+ // Implicit argument to the pcmpestri intrinsic
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+ value: lb,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.pclmulqdq" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
+ intrinsic_args!(fx, args => (a, b, _imm8); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let b = b.load_scalar(fx);
+
+ let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[2])
+ {
+ imm8
+ } else {
+ fx.tcx.sess.span_fatal(
+ span,
+ "Index argument for `_mm_clmulepi64_si128` is not a constant",
+ );
+ };
+
+ let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String(format!("pclmulqdq xmm0, xmm1, {imm8}"))],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: b,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.aesni.aeskeygenassist" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261
+ intrinsic_args!(fx, args => (a, _imm8); intrinsic);
+
+ let a = a.load_scalar(fx);
+
+ let imm8 = if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1])
+ {
+ imm8
+ } else {
+ fx.tcx.sess.span_fatal(
+ span,
+ "Index argument for `_mm_aeskeygenassist_si128` is not a constant",
+ );
+ };
+
+ let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))],
+ &[CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ }],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.aesni.aesimc" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260
+ intrinsic_args!(fx, args => (a); intrinsic);
+
+ let a = a.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())],
+ &[CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ }],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.aesni.aesenc" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252
+ intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let round_key = round_key.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: round_key,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.aesni.aesenclast" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257
+ intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let round_key = round_key.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: round_key,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.aesni.aesdec" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242
+ intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let round_key = round_key.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: round_key,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.aesni.aesdeclast" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247
+ intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let round_key = round_key.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ value: round_key,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.sha256rnds2" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977
+ intrinsic_args!(fx, args => (a, b, k); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let b = b.load_scalar(fx);
+ let k = k.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("sha256rnds2 xmm1, xmm2".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+ value: b,
+ },
+ // Implicit argument to the sha256rnds2 instruction
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+ value: k,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.sha256msg1" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32&ig_expand=5975
+ intrinsic_args!(fx, args => (a, b); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let b = b.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("sha256msg1 xmm1, xmm2".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+ value: b,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
+ }
+
+ "llvm.x86.sha256msg2" => {
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32&ig_expand=5976
+ intrinsic_args!(fx, args => (a, b); intrinsic);
+
+ let a = a.load_scalar(fx);
+ let b = b.load_scalar(fx);
+
+ codegen_inline_asm_inner(
+ fx,
+ &[InlineAsmTemplatePiece::String("sha256msg2 xmm1, xmm2".to_string())],
+ &[
+ CInlineAsmOperand::InOut {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+ _late: true,
+ in_value: a,
+ out_place: Some(ret),
+ },
+ CInlineAsmOperand::In {
+ reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+ value: b,
+ },
+ ],
+ InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+ );
}
"llvm.x86.avx.ptestz.256" => {
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index bfeeb117f..68126f124 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -487,13 +487,12 @@ fn codegen_regular_intrinsic_call<'tcx>(
let layout = fx.layout_of(generic_args.type_at(0));
// Note: Can't use is_unsized here as truly unsized types need to take the fixed size
// branch
- let size = if let Abi::ScalarPair(_, _) = ptr.layout().abi {
- let (_ptr, info) = ptr.load_scalar_pair(fx);
- let (size, _align) = crate::unsize::size_and_align_of_dst(fx, layout, info);
- size
+ let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi {
+ Some(ptr.load_scalar_pair(fx).1)
} else {
- fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64)
+ None
};
+ let (size, _align) = crate::unsize::size_and_align_of(fx, layout, meta);
ret.write_cvalue(fx, CValue::by_val(size, usize_layout));
}
sym::min_align_of_val => {
@@ -502,13 +501,12 @@ fn codegen_regular_intrinsic_call<'tcx>(
let layout = fx.layout_of(generic_args.type_at(0));
// Note: Can't use is_unsized here as truly unsized types need to take the fixed size
// branch
- let align = if let Abi::ScalarPair(_, _) = ptr.layout().abi {
- let (_ptr, info) = ptr.load_scalar_pair(fx);
- let (_size, align) = crate::unsize::size_and_align_of_dst(fx, layout, info);
- align
+ let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi {
+ Some(ptr.load_scalar_pair(fx).1)
} else {
- fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64)
+ None
};
+ let (_size, align) = crate::unsize::size_and_align_of(fx, layout, meta);
ret.write_cvalue(fx, CValue::by_val(align, usize_layout));
}
@@ -688,7 +686,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
}
})
});
- crate::base::codegen_panic_nounwind(fx, &msg_str, source_info);
+ crate::base::codegen_panic_nounwind(fx, &msg_str, Some(source_info.span));
return;
}
}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index ea137c4ca..fe4f073f7 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -1,7 +1,6 @@
//! Codegen `extern "platform-intrinsic"` intrinsics.
-use rustc_middle::ty::GenericArgsRef;
-use rustc_span::Symbol;
+use cranelift_codegen::ir::immediates::Offset32;
use rustc_target::abi::Endian;
use super::*;
@@ -282,11 +281,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
fx.tcx.sess.span_fatal(span, "Index argument for `simd_insert` is not a constant");
};
- let idx = idx_const
- .try_to_bits(Size::from_bytes(4 /* u32*/))
- .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const));
+ let idx: u32 = idx_const
+ .try_to_u32()
+ .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx);
- if idx >= lane_count.into() {
+ if u64::from(idx) >= lane_count {
fx.tcx.sess.span_fatal(
fx.mir.span,
format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count),
@@ -331,10 +330,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
};
let idx = idx_const
- .try_to_bits(Size::from_bytes(4 /* u32*/))
- .unwrap_or_else(|| panic!("kind not scalar: {:?}", idx_const));
+ .try_to_u32()
+ .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx);
- if idx >= lane_count.into() {
+ if u64::from(idx) >= lane_count {
fx.tcx.sess.span_fatal(
fx.mir.span,
format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count),
@@ -1008,8 +1007,57 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
}
}
+ sym::simd_masked_load => {
+ intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
+
+ let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+ let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+ let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+ assert_eq!(val_lane_count, mask_lane_count);
+ assert_eq!(val_lane_count, ret_lane_count);
+
+ let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+ let ret_lane_layout = fx.layout_of(ret_lane_ty);
+ let ptr_val = ptr.load_scalar(fx);
+
+ for lane_idx in 0..ret_lane_count {
+ let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+ let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+ let if_enabled = fx.bcx.create_block();
+ let if_disabled = fx.bcx.create_block();
+ let next = fx.bcx.create_block();
+ let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+ fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+ fx.bcx.seal_block(if_enabled);
+ fx.bcx.seal_block(if_disabled);
+
+ fx.bcx.switch_to_block(if_enabled);
+ let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
+ let res = fx.bcx.ins().load(
+ lane_clif_ty,
+ MemFlags::trusted(),
+ ptr_val,
+ Offset32::new(offset),
+ );
+ fx.bcx.ins().jump(next, &[res]);
+
+ fx.bcx.switch_to_block(if_disabled);
+ fx.bcx.ins().jump(next, &[val_lane]);
+
+ fx.bcx.seal_block(next);
+ fx.bcx.switch_to_block(next);
+
+ fx.bcx.ins().nop();
+
+ ret.place_lane(fx, lane_idx)
+ .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+ }
+ }
+
sym::simd_scatter => {
- intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+ intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
index da84e54a9..196418023 100644
--- a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
+++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
@@ -58,11 +58,10 @@
use std::fmt;
use std::io::Write;
-use cranelift_codegen::{
- entity::SecondaryMap,
- ir::entities::AnyEntity,
- write::{FuncWriter, PlainWriter},
-};
+use cranelift_codegen::entity::SecondaryMap;
+use cranelift_codegen::ir::entities::AnyEntity;
+use cranelift_codegen::ir::Fact;
+use cranelift_codegen::write::{FuncWriter, PlainWriter};
use rustc_middle::ty::layout::FnAbiOf;
use rustc_middle::ty::print::with_no_trimmed_paths;
use rustc_session::config::{OutputFilenames, OutputType};
@@ -155,8 +154,13 @@ impl FuncWriter for &'_ CommentWriter {
_func: &Function,
entity: AnyEntity,
value: &dyn fmt::Display,
+ maybe_fact: Option<&Fact>,
) -> fmt::Result {
- write!(w, " {} = {}", entity, value)?;
+ if let Some(fact) = maybe_fact {
+ write!(w, " {} ! {} = {}", entity, fact, value)?;
+ } else {
+ write!(w, " {} = {}", entity, value)?;
+ }
if let Some(comment) = self.entity_comments.get(&entity) {
writeln!(w, " ; {}", comment.replace('\n', "\n; "))
@@ -227,9 +231,8 @@ pub(crate) fn write_ir_file(
let res = std::fs::File::create(clif_file_name).and_then(|mut file| write(&mut file));
if let Err(err) = res {
// Using early_warn as no Session is available here
- let handler = rustc_session::EarlyErrorHandler::new(
- rustc_session::config::ErrorOutputType::default(),
- );
+ let handler =
+ rustc_session::EarlyDiagCtxt::new(rustc_session::config::ErrorOutputType::default());
handler.early_warn(format!("error writing ir file: {}", err));
}
}
diff --git a/compiler/rustc_codegen_cranelift/src/unsize.rs b/compiler/rustc_codegen_cranelift/src/unsize.rs
index c6133f2b3..f777e1137 100644
--- a/compiler/rustc_codegen_cranelift/src/unsize.rs
+++ b/compiler/rustc_codegen_cranelift/src/unsize.rs
@@ -2,6 +2,9 @@
//!
//! [`PointerCoercion::Unsize`]: `rustc_middle::ty::adjustment::PointerCoercion::Unsize`
+use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
+
+use crate::base::codegen_panic_nounwind;
use crate::prelude::*;
// Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/base.rs#L159-L307
@@ -187,63 +190,113 @@ pub(crate) fn coerce_dyn_star<'tcx>(
// Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/glue.rs
-pub(crate) fn size_and_align_of_dst<'tcx>(
+pub(crate) fn size_and_align_of<'tcx>(
fx: &mut FunctionCx<'_, '_, 'tcx>,
layout: TyAndLayout<'tcx>,
- info: Value,
+ info: Option<Value>,
) -> (Value, Value) {
- assert!(layout.is_unsized() || layout.abi == Abi::Uninhabited);
- match layout.ty.kind() {
+ if layout.is_sized() {
+ return (
+ fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64),
+ fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64),
+ );
+ }
+
+ let ty = layout.ty;
+ match ty.kind() {
ty::Dynamic(..) => {
// load size/align from vtable
- (crate::vtable::size_of_obj(fx, info), crate::vtable::min_align_of_obj(fx, info))
+ (
+ crate::vtable::size_of_obj(fx, info.unwrap()),
+ crate::vtable::min_align_of_obj(fx, info.unwrap()),
+ )
}
ty::Slice(_) | ty::Str => {
let unit = layout.field(fx, 0);
// The info in this case is the length of the str, so the size is that
// times the unit size.
(
- fx.bcx.ins().imul_imm(info, unit.size.bytes() as i64),
+ fx.bcx.ins().imul_imm(info.unwrap(), unit.size.bytes() as i64),
fx.bcx.ins().iconst(fx.pointer_type, unit.align.abi.bytes() as i64),
)
}
- _ => {
+ ty::Foreign(_) => {
+ let trap_block = fx.bcx.create_block();
+ let true_ = fx.bcx.ins().iconst(types::I8, 1);
+ let next_block = fx.bcx.create_block();
+ fx.bcx.ins().brif(true_, trap_block, &[], next_block, &[]);
+ fx.bcx.seal_block(trap_block);
+ fx.bcx.seal_block(next_block);
+ fx.bcx.switch_to_block(trap_block);
+
+ // `extern` type. We cannot compute the size, so panic.
+ let msg_str = with_no_visible_paths!({
+ with_no_trimmed_paths!({
+ format!("attempted to compute the size or alignment of extern type `{ty}`")
+ })
+ });
+
+ codegen_panic_nounwind(fx, &msg_str, None);
+
+ fx.bcx.switch_to_block(next_block);
+
+ // This function does not return so we can now return whatever we want.
+ let size = fx.bcx.ins().iconst(fx.pointer_type, 42);
+ let align = fx.bcx.ins().iconst(fx.pointer_type, 42);
+ (size, align)
+ }
+ ty::Adt(..) | ty::Tuple(..) => {
// First get the size of all statically known fields.
// Don't use size_of because it also rounds up to alignment, which we
// want to avoid, as the unsized field's alignment could be smaller.
assert!(!layout.ty.is_simd());
let i = layout.fields.count() - 1;
- let sized_size = layout.fields.offset(i).bytes();
+ let unsized_offset_unadjusted = layout.fields.offset(i).bytes();
+ let unsized_offset_unadjusted =
+ fx.bcx.ins().iconst(fx.pointer_type, unsized_offset_unadjusted as i64);
let sized_align = layout.align.abi.bytes();
let sized_align = fx.bcx.ins().iconst(fx.pointer_type, sized_align as i64);
// Recurse to get the size of the dynamically sized field (must be
// the last field).
let field_layout = layout.field(fx, i);
- let (unsized_size, mut unsized_align) = size_and_align_of_dst(fx, field_layout, info);
-
- // FIXME (#26403, #27023): We should be adding padding
- // to `sized_size` (to accommodate the `unsized_align`
- // required of the unsized field that follows) before
- // summing it with `sized_size`. (Note that since #26403
- // is unfixed, we do not yet add the necessary padding
- // here. But this is where the add would go.)
-
- // Return the sum of sizes and max of aligns.
- let size = fx.bcx.ins().iadd_imm(unsized_size, sized_size as i64);
-
- // Packed types ignore the alignment of their fields.
- if let ty::Adt(def, _) = layout.ty.kind() {
- if def.repr().packed() {
- unsized_align = sized_align;
+ let (unsized_size, mut unsized_align) = size_and_align_of(fx, field_layout, info);
+
+ // # First compute the dynamic alignment
+
+ // For packed types, we need to cap the alignment.
+ if let ty::Adt(def, _) = ty.kind() {
+ if let Some(packed) = def.repr().pack {
+ if packed.bytes() == 1 {
+ // We know this will be capped to 1.
+ unsized_align = fx.bcx.ins().iconst(fx.pointer_type, 1);
+ } else {
+ // We have to dynamically compute `min(unsized_align, packed)`.
+ let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64);
+ let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed);
+ unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed);
+ }
}
}
// Choose max of two known alignments (combined value must
// be aligned according to more restrictive of the two).
let cmp = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, sized_align, unsized_align);
- let align = fx.bcx.ins().select(cmp, sized_align, unsized_align);
+ let full_align = fx.bcx.ins().select(cmp, sized_align, unsized_align);
+
+ // # Then compute the dynamic size
+
+ // The full formula for the size would be:
+ // let unsized_offset_adjusted = unsized_offset_unadjusted.align_to(unsized_align);
+ // let full_size = (unsized_offset_adjusted + unsized_size).align_to(full_align);
+ // However, `unsized_size` is a multiple of `unsized_align`.
+ // Therefore, we can equivalently do the `align_to(unsized_align)` *after* adding `unsized_size`:
+ // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(unsized_align).align_to(full_align);
+ // Furthermore, `align >= unsized_align`, and therefore we only need to do:
+ // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(full_align);
+
+ let full_size = fx.bcx.ins().iadd(unsized_offset_unadjusted, unsized_size);
// Issue #27023: must add any necessary padding to `size`
// (to make it a multiple of `align`) before returning it.
@@ -255,12 +308,13 @@ pub(crate) fn size_and_align_of_dst<'tcx>(
// emulated via the semi-standard fast bit trick:
//
// `(size + (align-1)) & -align`
- let addend = fx.bcx.ins().iadd_imm(align, -1);
- let add = fx.bcx.ins().iadd(size, addend);
- let neg = fx.bcx.ins().ineg(align);
- let size = fx.bcx.ins().band(add, neg);
+ let addend = fx.bcx.ins().iadd_imm(full_align, -1);
+ let add = fx.bcx.ins().iadd(full_size, addend);
+ let neg = fx.bcx.ins().ineg(full_align);
+ let full_size = fx.bcx.ins().band(add, neg);
- (size, align)
+ (full_size, full_align)
}
+ _ => bug!("size_and_align_of_dst: {ty} not supported"),
}
}
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index 21ad2a835..567a5669d 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -20,34 +20,36 @@ fn codegen_field<'tcx>(
(base.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()), field_layout)
};
- if let Some(extra) = extra {
- if field_layout.is_sized() {
- return simple(fx);
- }
- match field_layout.ty.kind() {
- ty::Slice(..) | ty::Str | ty::Foreign(..) => simple(fx),
- ty::Adt(def, _) if def.repr().packed() => {
- assert_eq!(layout.align.abi.bytes(), 1);
- simple(fx)
- }
- _ => {
- // We have to align the offset for DST's
- let unaligned_offset = field_offset.bytes();
- let (_, unsized_align) =
- crate::unsize::size_and_align_of_dst(fx, field_layout, extra);
+ if field_layout.is_sized() {
+ return simple(fx);
+ }
+ match field_layout.ty.kind() {
+ ty::Slice(..) | ty::Str => simple(fx),
+ _ => {
+ let unaligned_offset = field_offset.bytes();
- let one = fx.bcx.ins().iconst(fx.pointer_type, 1);
- let align_sub_1 = fx.bcx.ins().isub(unsized_align, one);
- let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64);
- let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
- let and_rhs = fx.bcx.ins().isub(zero, unsized_align);
- let offset = fx.bcx.ins().band(and_lhs, and_rhs);
+ // Get the alignment of the field
+ let (_, mut unsized_align) = crate::unsize::size_and_align_of(fx, field_layout, extra);
- (base.offset_value(fx, offset), field_layout)
+ // For packed types, we need to cap alignment.
+ if let ty::Adt(def, _) = layout.ty.kind() {
+ if let Some(packed) = def.repr().pack {
+ let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64);
+ let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed);
+ unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed);
+ }
}
+
+ // Bump the unaligned offset up to the appropriate alignment
+ let one = fx.bcx.ins().iconst(fx.pointer_type, 1);
+ let align_sub_1 = fx.bcx.ins().isub(unsized_align, one);
+ let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64);
+ let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
+ let and_rhs = fx.bcx.ins().isub(zero, unsized_align);
+ let offset = fx.bcx.ins().band(and_lhs, and_rhs);
+
+ (base.offset_value(fx, offset), field_layout)
}
- } else {
- simple(fx)
}
}
@@ -329,7 +331,13 @@ impl<'tcx> CValue<'tcx> {
let msb = fx.bcx.ins().iconst(types::I64, (const_val >> 64) as u64 as i64);
fx.bcx.ins().iconcat(lsb, msb)
}
- ty::Bool | ty::Char | ty::Uint(_) | ty::Int(_) | ty::Ref(..) | ty::RawPtr(..) => {
+ ty::Bool
+ | ty::Char
+ | ty::Uint(_)
+ | ty::Int(_)
+ | ty::Ref(..)
+ | ty::RawPtr(..)
+ | ty::FnPtr(..) => {
let raw_val = const_val.size().truncate(const_val.to_bits(layout.size).unwrap());
fx.bcx.ins().iconst(clif_ty, raw_val as i64)
}
@@ -725,13 +733,8 @@ impl<'tcx> CPlace<'tcx> {
};
let (field_ptr, field_layout) = codegen_field(fx, base, extra, layout, field);
- if field_layout.is_unsized() {
- if let ty::Foreign(_) = field_layout.ty.kind() {
- assert!(extra.is_none());
- CPlace::for_ptr(field_ptr, field_layout)
- } else {
- CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout)
- }
+ if has_ptr_meta(fx.tcx, field_layout.ty) {
+ CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout)
} else {
CPlace::for_ptr(field_ptr, field_layout)
}
@@ -971,6 +974,32 @@ pub(crate) fn assert_assignable<'tcx>(
}
}
}
+ (&ty::Coroutine(def_id_a, args_a, mov_a), &ty::Coroutine(def_id_b, args_b, mov_b))
+ if def_id_a == def_id_b && mov_a == mov_b =>
+ {
+ let mut types_a = args_a.types();
+ let mut types_b = args_b.types();
+ loop {
+ match (types_a.next(), types_b.next()) {
+ (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+ (None, None) => return,
+ (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+ }
+ }
+ }
+ (&ty::CoroutineWitness(def_id_a, args_a), &ty::CoroutineWitness(def_id_b, args_b))
+ if def_id_a == def_id_b =>
+ {
+ let mut types_a = args_a.types();
+ let mut types_b = args_b.types();
+ loop {
+ match (types_a.next(), types_b.next()) {
+ (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+ (None, None) => return,
+ (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+ }
+ }
+ }
(ty::Param(_), _) | (_, ty::Param(_)) if fx.tcx.sess.opts.unstable_opts.polymorphize => {
// No way to check if it is correct or not with polymorphization enabled
}